---
title: "Aggregate Facebook Data"
output: html_notebook
---

# 0. Packages

```{r, message = FALSE, warning=FALSE}
#install.packages("sf")
#install.packages("rgdal")
#install.packages("tidyverse")
library(sf)
library(rgdal)
library(tidyverse)

# This is where to get US census tracts
# https://rdrr.io/cran/tigris/man/tracts.html
# tracts(state, cb = TRUE, year = 2019, class = "sf")

```


# 1. Aggregate to Prefecture/County level

```{r, eval = FALSE}
# Load the North East Asia Albers Equal Area Conic Project
eqac = "+proj=aea +lat_1=15 +lat_2=65 +lat_0=30 +lon_0=95 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs"

```


```{r, message = FALSE, warning = FALSE, eval = FALSE}
# Next, Import Japan municipal shapefile 

# Gather a list of Japanese municipalities
read_csv("indices_wide.csv") %>%
  select(muni_code, muni, pref) %>%
  distinct() %>%
  mutate(muni_code = muni_code %>% str_pad(width = 5, side = "left", pad = "0")) %>%
  write_csv("muni.csv")

# Read in our shapefile of municipal political boundaries
jp = read_sf("shapefiles/japan_shapefile/polbnda_jpn.shp") %>%
  # Transform to Asia North Albers Equal Area Conic projection
  # Obtained here: https://spatialreference.org/ref/esri/102025/
  st_transform(
    CRS(eqac)) %>%
  # now add Municipality code
  left_join(by = c("adm_code" = "muni_code"), y = read_csv("muni.csv")) %>%
  # Now get rid of missing adm_code
  filter(!is.na(adm_code)) %>%
  # Now grab just the main entries
  select(geometry, pref, muni, muni_code = adm_code)

# Save this to file
jp %>% 
  saveRDS("nodes/japan_hagibis.csv")

# Evacuation included sources and destinations across 39 of Japan's 47 prefectures. 
# Let's keep the entire network.
read_csv("aggregate/japan_hagibis.csv") %>% 
  select(from_code, to_code) %>%
  pivot_longer(
    cols = c(from_code, to_code)) %>%
  select(value) %>% 
  mutate(value = value %>% str_sub(1,2)) %>%
  unique()
```


## Typhoon Hagibis

```{r}
###############################################
# Obtain geographic coordinates for Facebook observations
###############################################
read_csv("data/japan_hagibis.csv") %>% 
  # Remove unnecessary elements from geometry
  mutate(geometry = geometry %>% 
           str_remove_all("[)]|[(]") %>% 
           str_remove("LINESTRING ") %>% 
           str_remove("[,]")) %>%
  # Extract longitude and latitude from geometry
  mutate(
    from_lon = geometry %>% word(1),
    from_lat = geometry %>% word(2),
    to_lon = geometry %>% word(3),
    to_lat = geometry %>% word(4)) %>%
  # Convert to numeric
  mutate_at(vars(from_lon, from_lat, to_lon, to_lat), as.numeric) %>%
  # Give each edge a unique ID
  mutate(id = 1:n()) %>%
  write_csv("temp/temp.csv")

##################################################
# Identify corresponding code for Facebook id
##################################################
bind_rows(
  read_csv("temp/temp.csv") %>% 
    select(id = from_id, lon = from_lon, lat = from_lat),
  read_csv("temp/temp.csv") %>%
    select(id = to_id, lon = to_lon, lat = to_lat)) %>%
  # Eliminate duplicates
  distinct() %>%
  # Next, let's grab the ending coordinates of the move
  # and register these as the geography of the point
  st_as_sf(coords = c("lon", "lat")) %>%
  # Set initial projection as World Map,
  st_set_crs(4326) %>%
  # And then reformat to Albers North Asia Equal-AreaConic projection
  st_transform(CRS(eqac)) %>%
  # Last, we join the traits of the polygons from jp to the points that lie within them
  st_join(jp %>%
            select(muni_code = muni_code)) %>%
  as.data.frame() %>% select(-geometry) %>%
  distinct() %>%
  # Export geocoded points to file
  write_csv("temp/points.csv")

##############################################################
# Aggregate baseline and crisis time movements for each date-time
##############################################################
read_csv("temp/temp.csv") %>%
  select(datetime, from_id, to_id, n_baseline, n_crisis) %>%
  # Join in correct codes
  left_join(by = c("from_id" = "id"),
            y = read_csv("temp/points.csv") %>%
              rename(from_code = muni_code)) %>%
  left_join(by = c("to_id" = "id"),
            y = read_csv("temp/points.csv") %>%
              rename(to_code = muni_code)) %>%
  # Now aggregate to the municipal level
  group_by(from_code, to_code, datetime) %>%
  summarize(n_baseline = sum(n_baseline, na.rm = TRUE),
            n_crisis = sum(n_crisis, na.rm = TRUE)) %>%
  # Export to file
  write_csv("aggregate/japan_hagibis.csv")

##############################################################
# Repeat but just for moves under 30 km
##############################################################
read_csv("temp/temp.csv") %>%
  select(datetime, from_id, to_id, km, n_baseline, n_crisis) %>%
  # Join in correct codes
  left_join(by = c("from_id" = "id"),
            y = read_csv("temp/points.csv") %>%
              rename(from_code = muni_code)) %>%
  left_join(by = c("to_id" = "id"),
            y = read_csv("temp/points.csv") %>%
              rename(to_code = muni_code)) %>%
  filter(km < 30) %>%
  # Now aggregate to the municipal level
  group_by(from_code, to_code, datetime) %>%
  summarize(n_baseline = sum(n_baseline, na.rm = TRUE),
            n_crisis = sum(n_crisis, na.rm = TRUE)) %>%
  # Export to file
  write_csv("aggregate/japan_hagibis_under_30km.csv")

##############################################################
# Repeat but just for moves equal to or over 30 km
##############################################################
read_csv("temp/temp.csv") %>%
  select(datetime, from_id, to_id, km, n_baseline, n_crisis) %>%
  # Join in correct codes
  left_join(by = c("from_id" = "id"),
            y = read_csv("temp/points.csv") %>%
              rename(from_code = muni_code)) %>%
  left_join(by = c("to_id" = "id"),
            y = read_csv("temp/points.csv") %>%
              rename(to_code = muni_code)) %>%
  filter(km >= 30) %>%
  # Now aggregate to the municipal level
  group_by(from_code, to_code, datetime) %>%
  summarize(n_baseline = sum(n_baseline, na.rm = TRUE),
            n_crisis = sum(n_crisis, na.rm = TRUE)) %>%
  # Export to file
  write_csv("aggregate/japan_hagibis_over_30km.csv")
```

## Chiba Floods

```{r}
###############################################
# Obtain geographic coordinates for Facebook observations
###############################################
read_csv("data/flood_chiba.csv") %>% 
  # Remove unnecessary elements from geometry
  mutate(geometry = geometry %>% 
           str_remove_all("[)]|[(]") %>% 
           str_remove("LINESTRING ") %>% 
           str_remove("[,]")) %>%
  # Extract longitude and latitude from geometry
  mutate(
    from_lon = geometry %>% word(1),
    from_lat = geometry %>% word(2),
    to_lon = geometry %>% word(3),
    to_lat = geometry %>% word(4)) %>%
  # Convert to numeric
  mutate_at(vars(from_lon, from_lat, to_lon, to_lat), as.numeric) %>%
  # Give each edge a unique ID
  mutate(id = 1:n()) %>%
  write_csv("temp/temp.csv")

##################################################
# Identify corresponding code for Facebook id
##################################################
bind_rows(
  read_csv("temp/temp.csv") %>% 
    select(id = from_id, lon = from_lon, lat = from_lat),
  read_csv("temp/temp.csv") %>%
    select(id = to_id, lon = to_lon, lat = to_lat)) %>%
  # Eliminate duplicates
  distinct() %>%
  # Next, let's grab the ending coordinates of the move
  # and register these as the geography of the point
  st_as_sf(coords = c("lon", "lat")) %>%
  # Set initial projection as World Map,
  st_set_crs(4326) %>%
  # And then reformat to Albers North Asia Equal-AreaConic projection
  st_transform(CRS(eqac)) %>%
  # Last, we join the traits of the polygons from jp to the points that lie within them
  st_join(jp %>%
            select(muni_code = muni_code)) %>%
  as.data.frame() %>% select(-geometry) %>%
  # Narrow down to just distinct Facebook id-muni_code pairs
  distinct() %>%
  # Export geocoded points to file
  write_csv("temp/points.csv")

##############################################################
# Aggregate baseline and crisis time movements for each date-time
##############################################################

read_csv("temp/temp.csv") %>%
  select(datetime, from_id, to_id, n_baseline, n_crisis) %>%
  # Join in correct codes
  left_join(by = c("from_id" = "id"),
            y = read_csv("temp/points.csv") %>%
              rename(from_code = muni_code)) %>%
  left_join(by = c("to_id" = "id"),
            y = read_csv("temp/points.csv") %>%
              rename(to_code = muni_code)) %>%
  # Now aggregate to the municipal level
  group_by(from_code, to_code, datetime) %>%
  summarize(n_baseline = sum(n_baseline, na.rm = TRUE),
            n_crisis = sum(n_crisis, na.rm = TRUE)) %>%
  # Export to file
  write_csv("aggregate/flood_chiba.csv")


##############################################################
# Repeat but just for moves under 30 km
##############################################################
read_csv("temp/temp.csv") %>%
  select(datetime, from_id, to_id, km, n_baseline, n_crisis) %>%
  # Join in correct codes
  left_join(by = c("from_id" = "id"),
            y = read_csv("temp/points.csv") %>%
              rename(from_code = muni_code)) %>%
  left_join(by = c("to_id" = "id"),
            y = read_csv("temp/points.csv") %>%
              rename(to_code = muni_code)) %>%
  filter(km < 30) %>%
  # Now aggregate to the municipal level
  group_by(from_code, to_code, datetime) %>%
  summarize(n_baseline = sum(n_baseline, na.rm = TRUE),
            n_crisis = sum(n_crisis, na.rm = TRUE)) %>%
  # Export to file
  write_csv("aggregate/flood_chiba_under_30km.csv")

##############################################################
# Repeat but just for moves equal to or over 30 km
##############################################################
read_csv("temp/temp.csv") %>%
  select(datetime, from_id, to_id, km, n_baseline, n_crisis) %>%
  # Join in correct codes
  left_join(by = c("from_id" = "id"),
            y = read_csv("temp/points.csv") %>%
              rename(from_code = muni_code)) %>%
  left_join(by = c("to_id" = "id"),
            y = read_csv("temp/points.csv") %>%
              rename(to_code = muni_code)) %>%
  filter(km >= 30) %>%
  # Now aggregate to the municipal level
  group_by(from_code, to_code, datetime) %>%
  summarize(n_baseline = sum(n_baseline, na.rm = TRUE),
            n_crisis = sum(n_crisis, na.rm = TRUE)) %>%
  # Export to file
  write_csv("aggregate/flood_chiba_over_30km.csv")
```

```{r}
#read_csv("japan_timeseries_data.csv") %>%
#  filter(year >= 2017)
```


```{r}
# These are the unique prefecture  codes involved
read_csv("aggregate/flood_chiba.csv") %>% 
  select(from_code, to_code) %>% 
  pivot_longer(cols = c(from_code, to_code)) %>%
  select(-name) %>%
  distinct() %>%
  mutate(state = value %>% str_sub(1,2)) %>%
  select(state) %>%
  unique()
# Looks like the Chiba floods online sent people between 08, 11, 12, 13, 14
# That means Ibaraki (08), Saitama (11), Chiba (12), Tokyo (13), and Kanagawa (14)
# As a result, we confine the nodelist here to just Kanto Prefectures
read_csv("pref.csv") %>%
  filter(region == "Kanto")

# Save a smaller list of nodes for just the Kanto region
readRDS("nodes/japan_hagibis.csv") %>% 
  filter(pref %in% c("Ibaraki", "Tochigi", "Gunma", 
                     "Saitama", "Chiba", "Tokyo", "Kanagawa")) %>%
  saveRDS("nodes/flood_chiba.csv")
```


## Floods in Jackson Mississippi (no distance)

```{r}
# For the US, let's use the Contiguous US Albers Equal Area Conic Projection
# Contiguous United States NAD27 Albers Equal Area
# https://spatialreference.org/ref/sr-org/7271/

aea <- "+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=23 +lon_0=-96 +x_0=0 +y_0=0 +ellps=clrk66 +units=m +no_defs" 

mississippi <- read_sf("shapefiles/mississippi/tl_2019_28_cousub.shp") %>%
  # Transform to Asia North Albers Equal Area Conic projection
  # Obtained here: https://spatialreference.org/ref/esri/102025/
  st_transform(
    CRS(aea)) %>%
    select(state = STATEFP, county = COUNTYFP, sub = COUSUBFP) %>%
    filter(!is.na(sub))

arkansas <- read_sf("shapefiles/arkansas/tl_2019_05_cousub.shp") %>%
  # Transform to Asia North Albers Equal Area Conic projection
  # Obtained here: https://spatialreference.org/ref/esri/102025/
  st_transform(
    CRS(aea)) %>%
    select(state = STATEFP, county = COUNTYFP, sub = COUSUBFP) %>%
    filter(!is.na(sub))

alabama <- read_sf("shapefiles/alabama/tl_2019_01_cousub.shp") %>%
  # Transform to Asia North Albers Equal Area Conic projection
  # Obtained here: https://spatialreference.org/ref/esri/102025/
  st_transform(
    CRS(aea)) %>%
    select(state = STATEFP, county = COUNTYFP, sub = COUSUBFP) %>%
    filter(!is.na(sub))

louisiana <- read_sf("shapefiles/louisiana/tl_2019_22_cousub.shp") %>%
  # Transform to Asia North Albers Equal Area Conic projection
  # Obtained here: https://spatialreference.org/ref/esri/102025/
  st_transform(
    CRS(aea)) %>%
    select(state = STATEFP, county = COUNTYFP, sub = COUSUBFP) %>%
    filter(!is.na(sub))

# Bind together county subdivisions for four states
shape <- rbind(
  mississippi,
  arkansas,
  alabama,
  louisiana
) %>%
  mutate(code = paste(state, county, sub, sep = "")) %>%
  select(-state, -county, -sub)

shape %>%
  saveRDS("nodes/flood_jackson.csv")
remove(alabama, arkansas, louisiana, mississippi)
```


```{r}
# These are the unique state FIPS codes involved
read_csv("aggregate/flood_jackson.csv") %>% 
  select(from_code, to_code) %>% 
  pivot_longer(cols = c(from_code, to_code)) %>%
  select(-name) %>%
  distinct() %>%
  mutate(state = value %>% str_sub(1,2)) %>%
  select(state) %>%
  unique()

```



```{r, message = FALSE, warning = FALSE}
###############################################
# Obtain geographic coordinates for Facebook observations
###############################################
read_csv("data/flood_jackson.csv") %>% 
  # Convert to numeric
  mutate_at(vars(from_lon, from_lat, to_lon, to_lat), as.numeric) %>%
  # Give each edge a unique ID
  mutate(id = 1:n()) %>%
  # Keep only complete observations
  filter(!is.na(n_baseline) & !is.na(n_crisis)) %>%
  write_csv("temp/temp.csv")


##################################################
# Identify corresponding code for Facebook id
##################################################

read_csv("temp/temp.csv") %>% 
  select(id, from_lon, from_lat) %>%
  # Next, let's grab the ending coordinates of the move
  # and register these as the geography of the point
  st_as_sf(coords = c("from_lon", "from_lat")) %>%
  # Set initial projection as World Map,
  st_set_crs(4326) %>%
  # And then reformat to Albers Equal Area Projection for the Continguous US
  st_transform(CRS(aea)) %>%
  # Last, we join the traits of the polygons from jp to the points that lie within them
  st_join(shape) %>%
  as.data.frame() %>% select(-geometry) %>%
  # Narrow down to just distinct Facebook id-muni_code pairs
  distinct() %>%
  write_csv("temp/from_points.csv")


  
read_csv("temp/temp.csv") %>%
  select(id, to_lon, to_lat) %>%
# Next, let's grab the ending coordinates of the move
  # and register these as the geography of the point
  st_as_sf(coords = c("to_lon", "to_lat")) %>%
  # Set initial projection as World Map,
  st_set_crs(4326) %>%
  # And then reformat to Albers Equal Area Projection for the Continguous US
  st_transform(CRS(aea)) %>%
  # Last, we join the traits of the polygons from jp to the points that lie within them
  st_join(shape) %>%
  as.data.frame() %>% select(-geometry) %>%
  # Narrow down to just distinct Facebook id-muni_code pairs
  distinct() %>%
  # Export geocoded points to file
  write_csv("temp/to_points.csv")

remove(shape)

##############################################################
# Aggregate baseline and crisis time movements for each date-time
##############################################################

read_csv("temp/temp.csv") %>%
  select(id, datetime, from_name, to_name, n_baseline, n_crisis) %>%
  # Join in correct codes
  left_join(by = "id",
            y = read_csv("temp/from_points.csv") %>%
              rename(from_code = code)) %>%
  left_join(by = "id",
            y = read_csv("temp/to_points.csv") %>%
              rename(to_code = code)) %>%
  # Now aggregate to the municipal level
  group_by(from_code, to_code, datetime) %>%
  summarize(n_baseline = sum(n_baseline, na.rm = TRUE),
            n_crisis = sum(n_crisis, na.rm = TRUE)) %>%
  # Export to file
  write_csv("aggregate/flood_jackson.csv")
```



## Tornado in Nashville

```{r}


aea <- "+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=23 +lon_0=-96 +x_0=0 +y_0=0 +ellps=clrk66 +units=m +no_defs" 

# mississippi <- read_sf("shapefiles/mississippi/tl_2019_28_cousub.shp") %>%
#  st_transform(
#    CRS(aea)) %>%
#    select(state = STATEFP, county = COUNTYFP, sub = COUSUBFP) %>%
#    filter(!is.na(sub))

#arkansas <- read_sf("shapefiles/arkansas/tl_2019_05_cousub.shp") %>%
 # st_transform(
#    CRS(aea)) %>%
 #   select(state = STATEFP, county = COUNTYFP, sub = COUSUBFP) %>%
  #  filter(!is.na(sub))

#alabama <- read_sf("shapefiles/alabama/tl_2019_01_cousub.shp") %>%
#  st_transform(
#    CRS(aea)) %>%
#    select(state = STATEFP, county = COUNTYFP, sub = COUSUBFP) %>%
#    filter(!is.na(sub))

#missouri <- read_sf("shapefiles/missouri/tl_2019_29_cousub.shp") %>%
#  st_transform(
#    CRS(aea)) %>%
#    select(state = STATEFP, county = COUNTYFP, sub = COUSUBFP) %>%
#    filter(!is.na(sub))

tennessee <- read_sf("shapefiles/tennessee/tl_2019_47_cousub.shp") %>%
  # Transform to Asia North Albers Equal Area Conic projection
  # Obtained here: https://spatialreference.org/ref/esri/102025/
  st_transform(
    CRS(aea)) %>%
    select(state = STATEFP, county = COUNTYFP, sub = COUSUBFP) %>%
    filter(!is.na(sub))

# georgia <- read_sf("shapefiles/georgia/tl_2019_13_cousub.shp") %>%
  # Transform to Asia North Albers Equal Area Conic projection
  # Obtained here: https://spatialreference.org/ref/esri/102025/
#  st_transform(
#    CRS(aea)) %>%
#    select(state = STATEFP, county = COUNTYFP, sub = COUSUBFP) %>%
#    filter(!is.na(sub))

#kentucky <- read_sf("shapefiles/kentucky/tl_2019_21_cousub.shp") %>%
  # Transform to Asia North Albers Equal Area Conic projection
  # Obtained here: https://spatialreference.org/ref/esri/102025/
#  st_transform(
#    CRS(aea)) %>%
#    select(state = STATEFP, county = COUNTYFP, sub = COUSUBFP) %>%
#    filter(!is.na(sub))

# Bind together county subdivisions for four states
shape <- rbind(
#  alabama, arkansas, mississippi, 
#  kentucky, georgia, missouri 
  tennessee) %>%
  mutate(code = paste(state, county, sub, sep = "")) %>%
  select(-state, -county, -sub)

remove(tennessee)
       #alabama, arkansas, mississippi, 
  # kentucky, georgia, , missouri

shape %>%
  saveRDS("nodes/tornado_nashville.csv")

# missouri (29), 
# kentucky (21), 
# tennessee (47), 
# alabama (01), 
# mississippi (28), 
# arkansas (05), 
# georgia (13)
```




```{r, message = FALSE, warning = FALSE}
###############################################
# Obtain geographic coordinates for Facebook observations
###############################################
read_csv("data/tornado_nashville.csv") %>% 
  # Give each edge a unique ID
  mutate(id = 1:n()) %>%
  # Keep only complete observations
  filter(!is.na(n_baseline) & !is.na(n_crisis)) %>%
  # edit geometry
  mutate(geometry = geometry %>% 
           str_remove_all("[)]|[(]") %>% 
           str_remove("LINESTRING ") %>% 
           str_remove("[,]")) %>%
  # Extract longitude and latitude from geometry
  mutate(
    from_lon = geometry %>% word(1),
    from_lat = geometry %>% word(2),
    to_lon = geometry %>% word(3),
    to_lat = geometry %>% word(4)) %>%
  # Convert to numeric
  mutate_at(vars(from_lon, from_lat, to_lon, to_lat), as.numeric) %>%
  write_csv("temp/temp.csv")

##################################################
# Identify corresponding code for Facebook id
##################################################

read_csv("temp/temp.csv") %>% 
  select(id, from_lon, from_lat) %>%
  # Next, let's grab the ending coordinates of the move
  # and register these as the geography of the point
  st_as_sf(coords = c("from_lon", "from_lat")) %>%
  # Set initial projection as World Map,
  st_set_crs(4326) %>%
  # And then reformat to Albers Equal Area Projection for the Continguous US
  st_transform(CRS(aea)) %>%
  # Last, we join the traits of the polygons from jp to the points that lie within them
  st_join(shape) %>%
  as.data.frame() %>% select(-geometry) %>%
  # Narrow down to just distinct Facebook id-muni_code pairs
  distinct() %>%
  write_csv("temp/from_points.csv")


  
read_csv("temp/temp.csv") %>%
  select(id, to_lon, to_lat) %>%
# Next, let's grab the ending coordinates of the move
  # and register these as the geography of the point
  st_as_sf(coords = c("to_lon", "to_lat")) %>%
  # Set initial projection as World Map,
  st_set_crs(4326) %>%
  # And then reformat to Albers Equal Area Projection for the Continguous US
  st_transform(CRS(aea)) %>%
  # Last, we join the traits of the polygons from jp to the points that lie within them
  st_join(shape) %>%
  as.data.frame() %>% select(-geometry) %>%
  # Narrow down to just distinct Facebook id-muni_code pairs
  distinct() %>%
  # Export geocoded points to file
  write_csv("temp/to_points.csv")

remove(shape)

##############################################################
# Aggregate baseline and crisis time movements for each date-time
##############################################################

read_csv("temp/temp.csv") %>%
  select(id, datetime, from_name, to_name, n_baseline, n_crisis) %>%
  # Join in correct codes
  left_join(by = "id",
            y = read_csv("temp/from_points.csv") %>%
              rename(from_code = code)) %>%
  left_join(by = "id",
            y = read_csv("temp/to_points.csv") %>%
              rename(to_code = code)) %>%
  # Now aggregate to the municipal level
  group_by(from_code, to_code, datetime) %>%
  summarize(n_baseline = sum(n_baseline, na.rm = TRUE),
            n_crisis = sum(n_crisis, na.rm = TRUE)) %>%
  # Export to file
  write_csv("aggregate/tornado_nashville.csv")
```

## California Disasters

```{r}


aea <- "+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=23 +lon_0=-96 +x_0=0 +y_0=0 +ellps=clrk66 +units=m +no_defs" 


cali <- read_sf("shapefiles/california/tl_2019_06_cousub.shp") %>%
  st_transform(
    CRS(aea)) %>%
    select(state = STATEFP, county = COUNTYFP, sub = COUSUBFP) %>%
    filter(!is.na(sub))

nevada <- read_sf("shapefiles/nevada/tl_2019_32_cousub.shp") %>%
  st_transform(
    CRS(aea)) %>%
    select(state = STATEFP, county = COUNTYFP, sub = COUSUBFP) %>%
    filter(!is.na(sub))


arizona <- read_sf("shapefiles/arizona/tl_2019_04_cousub.shp") %>%
  st_transform(
    CRS(aea)) %>%
    select(state = STATEFP, county = COUNTYFP, sub = COUSUBFP) %>%
    filter(!is.na(sub))


# Bind together county subdivisions for four states
shape <- rbind(
  cali, nevada, arizona) %>%
  mutate(code = paste(state, county, sub, sep = "")) %>%
  select(-state, -county, -sub)

shape %>%
  saveRDS("nodes/california.csv")

# California shutoff is just Cali and Nevada
# Southern California Shutoff is Cali, Nevada, and Arizona
# Getty Fire, Hillside Fire, Cave Fire,  is just California


remove(cali, nevada, arizona)
```




```{r, message = FALSE, warning = FALSE}
###############################################
# Obtain geographic coordinates for Facebook observations
###############################################
read_csv("data/california_shutoff.csv") %>% 
  # Give each edge a unique ID
  mutate(id = 1:n()) %>%
    # Keep only complete observations
  filter(!is.na(n_baseline) & !is.na(n_crisis)) %>%
  write_csv("temp/temp.csv")

# Break this one up, since the file is so large.
read_csv("temp/temp.csv") %>%
  # edit geometry
  mutate(geometry = geometry %>% 
           str_remove_all("[)]|[(]") %>% 
           str_remove("LINESTRING ") %>% 
           str_remove("[,]")) %>%
  # Extract longitude and latitude from geometry
  mutate(
    from_lon = geometry %>% word(1),
    from_lat = geometry %>% word(2),
    to_lon = geometry %>% word(3),
    to_lat = geometry %>% word(4)) %>%
  # Convert to numeric
  mutate_at(vars(from_lon, from_lat, to_lon, to_lat), as.numeric) %>%
  write_csv("temp/temp.csv")

##################################################
# Identify corresponding code for Facebook id
##################################################
read_csv("temp/temp.csv") %>% 
  select(id, from_lon, from_lat) %>%
  # Next, let's grab the ending coordinates of the move
  # and register these as the geography of the point
  st_as_sf(coords = c("from_lon", "from_lat")) %>%
  # Set initial projection as World Map,
  st_set_crs(4326) %>%
  # And then reformat to Albers Equal Area Projection for the Continguous US
  st_transform(CRS(aea)) %>%
  # Last, we join the traits of the polygons from jp to the points that lie within them
  st_join(shape) %>%
  as.data.frame() %>% select(-geometry) %>%
  # Narrow down to just distinct Facebook id-muni_code pairs
  distinct() %>%
  write_csv("temp/from_points.csv")


  
read_csv("temp/temp.csv") %>%
  select(id, to_lon, to_lat) %>%
# Next, let's grab the ending coordinates of the move
  # and register these as the geography of the point
  st_as_sf(coords = c("to_lon", "to_lat")) %>%
  # Set initial projection as World Map,
  st_set_crs(4326) %>%
  # And then reformat to Albers Equal Area Projection for the Continguous US
  st_transform(CRS(aea)) %>%
  # Last, we join the traits of the polygons from jp to the points that lie within them
  st_join(shape) %>%
  as.data.frame() %>% select(-geometry) %>%
  # Narrow down to just distinct Facebook id-muni_code pairs
  distinct() %>%
  # Export geocoded points to file
  write_csv("temp/to_points.csv")

##############################################################
# Aggregate baseline and crisis time movements for each date-time
##############################################################

read_csv("temp/temp.csv") %>%
  select(id, datetime, from_name, to_name, n_baseline, n_crisis) %>%
  # Join in correct codes
  left_join(by = "id",
            y = read_csv("temp/from_points.csv") %>%
              rename(from_code = code)) %>%
  left_join(by = "id",
            y = read_csv("temp/to_points.csv") %>%
              rename(to_code = code)) %>%
  # Now aggregate to the municipal level
  group_by(from_code, to_code, datetime) %>%
  summarize(n_baseline = sum(n_baseline, na.rm = TRUE),
            n_crisis = sum(n_crisis, na.rm = TRUE)) %>%
  # Export to file
  write_csv("aggregate/california_shutoff.csv")
```




```{r, message = FALSE, warning = FALSE}
###############################################
# Obtain geographic coordinates for Facebook observations
###############################################
read_csv("data/cave_fire_santa_barbara.csv") %>% 
  # Give each edge a unique ID
  mutate(id = 1:n()) %>%
  # Keep only complete observations
  filter(!is.na(n_baseline) & !is.na(n_crisis)) %>%
  # edit geometry
  mutate(geometry = geometry %>% 
           str_remove_all("[)]|[(]") %>% 
           str_remove("LINESTRING ") %>% 
           str_remove("[,]")) %>%
  # Extract longitude and latitude from geometry
  mutate(
    from_lon = geometry %>% word(1),
    from_lat = geometry %>% word(2),
    to_lon = geometry %>% word(3),
    to_lat = geometry %>% word(4)) %>%
  # Convert to numeric
  mutate_at(vars(from_lon, from_lat, to_lon, to_lat), as.numeric) %>%
  write_csv("temp/temp.csv")

##################################################
# Identify corresponding code for Facebook id
##################################################

read_csv("temp/temp.csv") %>% 
  select(id, from_lon, from_lat) %>%
  # Next, let's grab the ending coordinates of the move
  # and register these as the geography of the point
  st_as_sf(coords = c("from_lon", "from_lat")) %>%
  # Set initial projection as World Map,
  st_set_crs(4326) %>%
  # And then reformat to Albers Equal Area Projection for the Continguous US
  st_transform(CRS(aea)) %>%
  # Last, we join the traits of the polygons from jp to the points that lie within them
  st_join(shape) %>%
  as.data.frame() %>% select(-geometry) %>%
  # Narrow down to just distinct Facebook id-muni_code pairs
  distinct() %>%
  write_csv("temp/from_points.csv")


  
read_csv("temp/temp.csv") %>%
  select(id, to_lon, to_lat) %>%
# Next, let's grab the ending coordinates of the move
  # and register these as the geography of the point
  st_as_sf(coords = c("to_lon", "to_lat")) %>%
  # Set initial projection as World Map,
  st_set_crs(4326) %>%
  # And then reformat to Albers Equal Area Projection for the Continguous US
  st_transform(CRS(aea)) %>%
  # Last, we join the traits of the polygons from jp to the points that lie within them
  st_join(shape) %>%
  as.data.frame() %>% select(-geometry) %>%
  # Narrow down to just distinct Facebook id-muni_code pairs
  distinct() %>%
  # Export geocoded points to file
  write_csv("temp/to_points.csv")

##############################################################
# Aggregate baseline and crisis time movements for each date-time
##############################################################

read_csv("temp/temp.csv") %>%
  select(id, datetime, from_name, to_name, n_baseline, n_crisis) %>%
  # Join in correct codes
  left_join(by = "id",
            y = read_csv("temp/from_points.csv") %>%
              rename(from_code = code)) %>%
  left_join(by = "id",
            y = read_csv("temp/to_points.csv") %>%
              rename(to_code = code)) %>%
  # Now aggregate to the municipal level
  group_by(from_code, to_code, datetime) %>%
  summarize(n_baseline = sum(n_baseline, na.rm = TRUE),
            n_crisis = sum(n_crisis, na.rm = TRUE)) %>%
  # Export to file
  write_csv("aggregate/cave_fire_santa_barbara.csv")
```



```{r, message = FALSE, warning = FALSE}
###############################################
# Obtain geographic coordinates for Facebook observations
###############################################
read_csv("data/getty_fire.csv") %>% 
  # Give each edge a unique ID
  mutate(id = 1:n()) %>%
  # Keep only complete observations
  filter(!is.na(n_baseline) & !is.na(n_crisis)) %>%
  # edit geometry
  mutate(geometry = geometry %>% 
           str_remove_all("[)]|[(]") %>% 
           str_remove("LINESTRING ") %>% 
           str_remove("[,]")) %>%
  # Extract longitude and latitude from geometry
  mutate(
    from_lon = geometry %>% word(1),
    from_lat = geometry %>% word(2),
    to_lon = geometry %>% word(3),
    to_lat = geometry %>% word(4)) %>%
  # Convert to numeric
  mutate_at(vars(from_lon, from_lat, to_lon, to_lat), as.numeric) %>%
  write_csv("temp/temp.csv")

##################################################
# Identify corresponding code for Facebook id
##################################################

read_csv("temp/temp.csv") %>% 
  select(id, from_lon, from_lat) %>%
  # Next, let's grab the ending coordinates of the move
  # and register these as the geography of the point
  st_as_sf(coords = c("from_lon", "from_lat")) %>%
  # Set initial projection as World Map,
  st_set_crs(4326) %>%
  # And then reformat to Albers Equal Area Projection for the Continguous US
  st_transform(CRS(aea)) %>%
  # Last, we join the traits of the polygons from jp to the points that lie within them
  st_join(shape) %>%
  as.data.frame() %>% select(-geometry) %>%
  # Narrow down to just distinct Facebook id-muni_code pairs
  distinct() %>%
  write_csv("temp/from_points.csv")


  
read_csv("temp/temp.csv") %>%
  select(id, to_lon, to_lat) %>%
# Next, let's grab the ending coordinates of the move
  # and register these as the geography of the point
  st_as_sf(coords = c("to_lon", "to_lat")) %>%
  # Set initial projection as World Map,
  st_set_crs(4326) %>%
  # And then reformat to Albers Equal Area Projection for the Continguous US
  st_transform(CRS(aea)) %>%
  # Last, we join the traits of the polygons from jp to the points that lie within them
  st_join(shape) %>%
  as.data.frame() %>% select(-geometry) %>%
  # Narrow down to just distinct Facebook id-muni_code pairs
  distinct() %>%
  # Export geocoded points to file
  write_csv("temp/to_points.csv")

##############################################################
# Aggregate baseline and crisis time movements for each date-time
##############################################################

read_csv("temp/temp.csv") %>%
  select(id, datetime, from_name, to_name, n_baseline, n_crisis) %>%
  # Join in correct codes
  left_join(by = "id",
            y = read_csv("temp/from_points.csv") %>%
              rename(from_code = code)) %>%
  left_join(by = "id",
            y = read_csv("temp/to_points.csv") %>%
              rename(to_code = code)) %>%
  # Now aggregate to the municipal level
  group_by(from_code, to_code, datetime) %>%
  summarize(n_baseline = sum(n_baseline, na.rm = TRUE),
            n_crisis = sum(n_crisis, na.rm = TRUE)) %>%
  # Export to file
  write_csv("aggregate/getty_fire.csv")
```



```{r, message = FALSE, warning = FALSE}
###############################################
# Obtain geographic coordinates for Facebook observations
###############################################
read_csv("data/hillside_fire_san_bernardino.csv") %>% 
  # Give each edge a unique ID
  mutate(id = 1:n()) %>%
  # Keep only complete observations
  filter(!is.na(n_baseline) & !is.na(n_crisis)) %>%
  # edit geometry
  mutate(geometry = geometry %>% 
           str_remove_all("[)]|[(]") %>% 
           str_remove("LINESTRING ") %>% 
           str_remove("[,]")) %>%
  # Extract longitude and latitude from geometry
  mutate(
    from_lon = geometry %>% word(1),
    from_lat = geometry %>% word(2),
    to_lon = geometry %>% word(3),
    to_lat = geometry %>% word(4)) %>%
  # Convert to numeric
  mutate_at(vars(from_lon, from_lat, to_lon, to_lat), as.numeric) %>%
  write_csv("temp/temp.csv")

##################################################
# Identify corresponding code for Facebook id
##################################################

read_csv("temp/temp.csv") %>% 
  select(id, from_lon, from_lat) %>%
  # Next, let's grab the ending coordinates of the move
  # and register these as the geography of the point
  st_as_sf(coords = c("from_lon", "from_lat")) %>%
  # Set initial projection as World Map,
  st_set_crs(4326) %>%
  # And then reformat to Albers Equal Area Projection for the Continguous US
  st_transform(CRS(aea)) %>%
  # Last, we join the traits of the polygons from jp to the points that lie within them
  st_join(shape) %>%
  as.data.frame() %>% select(-geometry) %>%
  # Narrow down to just distinct Facebook id-muni_code pairs
  distinct() %>%
  write_csv("temp/from_points.csv")


  
read_csv("temp/temp.csv") %>%
  select(id, to_lon, to_lat) %>%
# Next, let's grab the ending coordinates of the move
  # and register these as the geography of the point
  st_as_sf(coords = c("to_lon", "to_lat")) %>%
  # Set initial projection as World Map,
  st_set_crs(4326) %>%
  # And then reformat to Albers Equal Area Projection for the Continguous US
  st_transform(CRS(aea)) %>%
  # Last, we join the traits of the polygons from jp to the points that lie within them
  st_join(shape) %>%
  as.data.frame() %>% select(-geometry) %>%
  # Narrow down to just distinct Facebook id-muni_code pairs
  distinct() %>%
  # Export geocoded points to file
  write_csv("temp/to_points.csv")

##############################################################
# Aggregate baseline and crisis time movements for each date-time
##############################################################

read_csv("temp/temp.csv") %>%
  select(id, datetime, from_name, to_name, n_baseline, n_crisis) %>%
  # Join in correct codes
  left_join(by = "id",
            y = read_csv("temp/from_points.csv") %>%
              rename(from_code = code)) %>%
  left_join(by = "id",
            y = read_csv("temp/to_points.csv") %>%
              rename(to_code = code)) %>%
  # Now aggregate to the municipal level
  group_by(from_code, to_code, datetime) %>%
  summarize(n_baseline = sum(n_baseline, na.rm = TRUE),
            n_crisis = sum(n_crisis, na.rm = TRUE)) %>%
  # Export to file
  write_csv("aggregate/hillside_fire_san_bernardino.csv")
```



```{r, message = FALSE, warning = FALSE}
###############################################
# Obtain geographic coordinates for Facebook observations
###############################################
read_csv("data/southern_california_shutoff.csv") %>% 
  # Give each edge a unique ID
  mutate(id = 1:n()) %>%
    # Keep only complete observations
  filter(!is.na(n_baseline) & !is.na(n_crisis)) %>%
  write_csv("temp/temp.csv")

# Break this one up, since the file is so large.
read_csv("temp/temp.csv") %>%
  # edit geometry
  mutate(geometry = geometry %>% 
           str_remove_all("[)]|[(]") %>% 
           str_remove("LINESTRING ") %>% 
           str_remove("[,]")) %>%
  # Extract longitude and latitude from geometry
  mutate(
    from_lon = geometry %>% word(1),
    from_lat = geometry %>% word(2),
    to_lon = geometry %>% word(3),
    to_lat = geometry %>% word(4)) %>%
  # select just main variables to reduce file size
  select(id, datetime, from_name, to_name, n_baseline, n_crisis,
         from_lon, from_lat, to_lon, to_lat) %>%
  # Convert to numeric
  mutate_at(vars(from_lon, from_lat, to_lon, to_lat), as.numeric) %>%
  write_csv("temp/temp.csv")
##################################################
# Identify corresponding code for Facebook id
##################################################

read_csv("temp/temp.csv") %>% 
  select(id, from_lon, from_lat) %>%
  # Next, let's grab the ending coordinates of the move
  # and register these as the geography of the point
  st_as_sf(coords = c("from_lon", "from_lat")) %>%
  # Set initial projection as World Map,
  st_set_crs(4326) %>%
  # And then reformat to Albers Equal Area Projection for the Continguous US
  st_transform(CRS(aea)) %>%
  # Last, we join the traits of the polygons from jp to the points that lie within them
  st_join(shape) %>%
  as.data.frame() %>% select(-geometry) %>%
  # Narrow down to just distinct Facebook id-muni_code pairs
  distinct() %>%
  write_csv("temp/from_points.csv")


  
read_csv("temp/temp.csv") %>%
  select(id, to_lon, to_lat) %>%
# Next, let's grab the ending coordinates of the move
  # and register these as the geography of the point
  st_as_sf(coords = c("to_lon", "to_lat")) %>%
  # Set initial projection as World Map,
  st_set_crs(4326) %>%
  # And then reformat to Albers Equal Area Projection for the Continguous US
  st_transform(CRS(aea)) %>%
  # Last, we join the traits of the polygons from jp to the points that lie within them
  st_join(shape) %>%
  as.data.frame() %>% select(-geometry) %>%
  # Narrow down to just distinct Facebook id-muni_code pairs
  distinct() %>%
  # Export geocoded points to file
  write_csv("temp/to_points.csv")

##############################################################
# Aggregate baseline and crisis time movements for each date-time
##############################################################

read_csv("temp/temp.csv") %>%
  select(id, datetime, from_name, to_name, n_baseline, n_crisis) %>%
  # Join in correct codes
  left_join(by = "id",
            y = read_csv("temp/from_points.csv") %>%
              rename(from_code = code)) %>%
  left_join(by = "id",
            y = read_csv("temp/to_points.csv") %>%
              rename(to_code = code)) %>%
  # Now aggregate to the municipal level
  group_by(from_code, to_code, datetime) %>%
  summarize(n_baseline = sum(n_baseline, na.rm = TRUE),
            n_crisis = sum(n_crisis, na.rm = TRUE)) %>%
  # Export to file
  write_csv("aggregate/southern_california_shutoff.csv")
```

```{r}
remove(shape)

```


## Hurricane Dorian Florida

```{r}
#  Alabama, Georgia, Florida

aea <- "+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=23 +lon_0=-96 +x_0=0 +y_0=0 +ellps=clrk66 +units=m +no_defs" 




alabama <- read_sf("shapefiles/alabama/tl_2019_01_cousub.shp") %>%
  st_transform(
    CRS(aea)) %>%
    select(state = STATEFP, county = COUNTYFP, sub = COUSUBFP) %>%
    filter(!is.na(sub))


florida <- read_sf("shapefiles/florida/tl_2019_12_cousub.shp") %>%
  st_transform(
    CRS(aea)) %>%
    select(state = STATEFP, county = COUNTYFP, sub = COUSUBFP) %>%
    filter(!is.na(sub))

georgia <- read_sf("shapefiles/georgia/tl_2019_13_cousub.shp") %>%
  st_transform(
    CRS(aea)) %>%
    select(state = STATEFP, county = COUNTYFP, sub = COUSUBFP) %>%
    filter(!is.na(sub))


# Bind together county subdivisions for four states
shape <- rbind(
  alabama, florida, georgia) %>%
  mutate(code = paste(state, county, sub, sep = "")) %>%
  select(-state, -county, -sub)

shape %>%
  saveRDS("nodes/hurricane_dorian_florida.csv")
#read_csv("aggregate/hurricane_dorian_florida.csv") %>%
#  select(from_code, to_code) %>%
#  pivot_longer(cols = c(from_code, to_code)) %>%
#  select(value) %>%
#  mutate(value = value %>% 
#           str_pad(width = "10", side = "left", pad = "0") %>%  
#           str_sub(1,2)) %>% 
#  unique()

# 10, 12, 13

remove(alabama,  georgia, florida)
```



```{r, message = FALSE, warning = FALSE}
###############################################
# Obtain geographic coordinates for Facebook observations
###############################################
read_csv("data/hurricane_dorian_florida.csv") %>% 
  # Give each edge a unique ID
  mutate(id = 1:n()) %>%
    # Keep only complete observations
  filter(!is.na(n_baseline) & !is.na(n_crisis)) %>%
  write_csv("temp/temp.csv")
```


```{r}
# Write a function to break up the data and consolidate

# Grab cases
n = (read_csv("temp/temp.csv") %>% dim())[1]

# Grab breaks
breaks = (n/20) %>% round()

reformat = function(num){
  read_csv("temp/temp.csv") %>%
    slice(num:(num+breaks)) %>%
    # Break this one up, since the file is so large.
    # edit geometry
    mutate(geometry = geometry %>% 
             str_remove_all("[)]|[(]") %>% 
             str_remove("LINESTRING ") %>% 
             str_remove("[,]")) %>%
    # Extract longitude and latitude from geometry
    mutate(
      from_lon = geometry %>% word(1),
      from_lat = geometry %>% word(2),
      to_lon = geometry %>% word(3),
      to_lat = geometry %>% word(4)) %>%
    # select just main variables to reduce file size
    select(id, datetime, from_name, to_name, n_baseline, n_crisis,
           from_lon, from_lat, to_lon, to_lat) %>%
    # Convert to numeric
    mutate_at(vars(from_lon, from_lat, to_lon, to_lat), as.numeric) %>%
    return()
}

# Now run the code and reformat each chunkc of 20000 rows separately,
# Then bind them together and save them
seq(from = 1, to = n, by = breaks) %>%
  lapply(reformat) %>%
  bind_rows() %>%
  write_csv("temp/temp.csv")
```

```{r}
##################################################
# Identify corresponding code for Facebook id
##################################################

get_from_points = function(num){
  
  read_csv("temp/temp.csv") %>% 
    slice(num:(num + breaks)) %>%
    select(id, from_lon, from_lat) %>%
    # Next, let's grab the ending coordinates of the move
    # and register these as the geography of the point
    st_as_sf(coords = c("from_lon", "from_lat")) %>%
    # Set initial projection as World Map,
    st_set_crs(4326) %>%
    # And then reformat to Albers Equal Area Projection for the Continguous US
    st_transform(CRS(aea)) %>%
    # Last, we join the traits of the polygons from jp to the points that lie within them
    st_join(shape) %>%
    as.data.frame() %>% select(-geometry) %>%
    # Narrow down to just distinct Facebook id-muni_code pairs
    distinct() %>%
    return()
}

seq(from = 1, to = n, by = breaks) %>%
  lapply(get_from_points) %>%
  bind_rows() %>%
  write_csv("temp/from_points.csv")


get_to_points = function(num){
  
  read_csv("temp/temp.csv") %>% 
    slice(num:(num + breaks)) %>%
    select(id, to_lon, to_lat) %>%
    # Next, let's grab the ending coordinates of the move
    # and register these as the geography of the point
    st_as_sf(coords = c("to_lon", "to_lat")) %>%
    # Set initial projection as World Map,
    st_set_crs(4326) %>%
    # And then reformat to Albers Equal Area Projection for the Continguous US
    st_transform(CRS(aea)) %>%
    # Last, we join the traits of the polygons from jp to the points that lie within them
    st_join(shape) %>%
    as.data.frame() %>% select(-geometry) %>%
    # Narrow down to just distinct Facebook id-muni_code pairs
    distinct() %>%
    return()
  
}

seq(from = 1, to = n, by = breaks) %>%
  lapply(get_to_points) %>%
  bind_rows() %>%
# Export geocoded points to file
  write_csv("temp/to_points.csv")


remove(shape)

##############################################################
# Aggregate baseline and crisis time movements for each date-time
##############################################################

read_csv("temp/temp.csv") %>%
  select(id, datetime, from_name, to_name, n_baseline, n_crisis) %>%
  # Join in correct codes
  left_join(by = "id",
            y = read_csv("temp/from_points.csv") %>%
              rename(from_code = code)) %>%
  left_join(by = "id",
            y = read_csv("temp/to_points.csv") %>%
              rename(to_code = code)) %>%
  # Now aggregate to the municipal level
  group_by(from_code, to_code, datetime) %>%
  summarize(n_baseline = sum(n_baseline, na.rm = TRUE),
            n_crisis = sum(n_crisis, na.rm = TRUE)) %>%
  ungroup() %>%
  mutate(from_code = from_code %>% 
           str_pad(width = "10", side = "left", pad = "0"),
         to_code = to_code %>% 
           str_pad(width = "10", side = "left", pad = "0")) %>%

  # Export to file
  write_csv("aggregate/hurricane_dorian_florida.csv")
```


## Puerto Rico

```{r}


pr <- "+proj=lcc +lat_1=18.43333333333333 +lat_2=18.03333333333333 +lat_0=17.83333333333333 +lon_0=-66.43333333333334 +x_0=200000 +y_0=200000 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs" 


puertorico <- read_sf("shapefiles/puertorico/tl_2019_72_cousub.shp") %>%
  # Transform to Asia North Albers Equal Area Conic projection
  # Obtained here: https://spatialreference.org/ref/esri/102025/
  st_transform(
    CRS(pr)) %>%
    select(state = STATEFP, county = COUNTYFP, sub = COUSUBFP) %>%
    filter(!is.na(sub))


caymanislands <- read_sf("shapefiles/caymanislands/tl_2019_78_cousub.shp") %>%
  # Transform to Asia North Albers Equal Area Conic projection
  # Obtained here: https://spatialreference.org/ref/esri/102025/
  st_transform(
    CRS(pr)) %>%
    select(state = STATEFP, county = COUNTYFP, sub = COUSUBFP) %>%
    filter(!is.na(sub))

# Bind together county subdivisions for four states
shape <- rbind(
  puertorico, caymanislands) %>%
  mutate(code = paste(state, county, sub, sep = "")) %>%
  select(-state, -county, -sub)

shape %>%
  saveRDS("nodes/earthquake_pr.csv")


remove(caymanislands, puertorico)
```



```{r, message = FALSE, warning = FALSE}
###############################################
# Obtain geographic coordinates for Facebook observations
###############################################
read_csv("data/earthquake_pr.csv") %>% 
  # Give each edge a unique ID
  mutate(id = 1:n()) %>%
    # Keep only complete observations
  filter(!is.na(n_baseline) & !is.na(n_crisis)) %>%
  write_csv("temp/temp.csv")

# Break this one up, since the file is so large.
read_csv("temp/temp.csv") %>%
  # edit geometry
  mutate(geometry = geometry %>% 
           str_remove_all("[)]|[(]") %>% 
           str_remove("LINESTRING ") %>% 
           str_remove("[,]")) %>%
  # Extract longitude and latitude from geometry
  mutate(
    from_lon = geometry %>% word(1),
    from_lat = geometry %>% word(2),
    to_lon = geometry %>% word(3),
    to_lat = geometry %>% word(4)) %>%
  # Convert to numeric
  mutate_at(vars(from_lon, from_lat, to_lon, to_lat), as.numeric) %>%
  write_csv("temp/temp.csv")

##################################################
# Identify corresponding code for Facebook id
##################################################
read_csv("temp/temp.csv") %>% 
  select(id, from_lon, from_lat) %>%
  # Next, let's grab the ending coordinates of the move
  # and register these as the geography of the point
  st_as_sf(coords = c("from_lon", "from_lat")) %>%
  # Set initial projection as World Map,
  st_set_crs(4326) %>%
  # And then reformat to Albers Equal Area Projection for the Continguous US
  st_transform(CRS(pr)) %>%
  # Last, we join the traits of the polygons from jp to the points that lie within them
  st_join(shape) %>%
  as.data.frame() %>% select(-geometry) %>%
  # Narrow down to just distinct Facebook id-muni_code pairs
  distinct() %>%
  write_csv("temp/from_points.csv")


  
read_csv("temp/temp.csv") %>%
  select(id, to_lon, to_lat) %>%
# Next, let's grab the ending coordinates of the move
  # and register these as the geography of the point
  st_as_sf(coords = c("to_lon", "to_lat")) %>%
  # Set initial projection as World Map,
  st_set_crs(4326) %>%
  # And then reformat to Albers Equal Area Projection for the Continguous US
  st_transform(CRS(pr)) %>%
  # Last, we join the traits of the polygons from jp to the points that lie within them
  st_join(shape) %>%
  as.data.frame() %>% select(-geometry) %>%
  # Narrow down to just distinct Facebook id-muni_code pairs
  distinct() %>%
  # Export geocoded points to file
  write_csv("temp/to_points.csv")

##############################################################
# Aggregate baseline and crisis time movements for each date-time
##############################################################

read_csv("temp/temp.csv") %>%
  select(id, datetime, from_name, to_name, n_baseline, n_crisis) %>%
  # Join in correct codes
  left_join(by = "id",
            y = read_csv("temp/from_points.csv") %>%
              rename(from_code = code)) %>%
  left_join(by = "id",
            y = read_csv("temp/to_points.csv") %>%
              rename(to_code = code)) %>%
  # Now aggregate to the municipal level
  group_by(from_code, to_code, datetime) %>%
  summarize(n_baseline = sum(n_baseline, na.rm = TRUE),
            n_crisis = sum(n_crisis, na.rm = TRUE)) %>%
  # Export to file
  write_csv("aggregate/earthquake_pr.csv")
```


# 2. Get Network Statistics

First, we write our lowest-level function, which takes an igraph object and gets network statistics from it.




```{r}

get_stats = function(g){
  
  # How many edges are in this temporal slice of the network?
  n = g %>% igraph::ecount()

  if(n > 1){

  ###############################
  ## Get Measures of Connectivity
  ###############################

    # Average Weighted Degree
  # average weighted total degree across all nodes in the network
  meanwdeg = g %>%
    igraph::strength(mode = "total", loops = TRUE, weights = igraph::E(g)$evacuation) %>%
    mean(na.rm = TRUE)
  # Average Weighted In-Degree
  # average weighted in-degree across all nodes in the network
  meanwideg = g %>%
    igraph::strength(mode = "in", loops = TRUE, weight = igraph::E(g)$evacuation) %>%
    mean(na.rm = TRUE)
  
  # Giant Component Percent (percentage of nodes in the largest component of the network)
  giant_comp = CINNA::giant_component_extract(g, directed = TRUE, num_proj = 1)[[1]] %>%
    igraph::vcount() / igraph::vcount(g)
  
  # Global Clustering Coefficient - measure of high triadic closure vs. tree-like
  trans_global = g %>% igraph::transitivity(type = "global")
  
  # Edge Density
  density = g %>% igraph::edge_density(loops = FALSE)
  
  
  #################################
  ## Get Measures of Heterogeneity
  #################################
  
  # Standard Deviation of Degree
  sdwdeg = g %>% igraph::strength(mode = "total", loops = TRUE,
                                  weights = igraph::E(g)$evacuation) %>%
    mean(na.rm = TRUE)
  
  # Standard Deviation of Indegree
  sdwideg = g %>% igraph::strength(mode = "in", loops = TRUE,
                                   weights = igraph::E(g)$evacuation) %>%
    mean(na.rm = TRUE)
  
  
  # Mean Diversity (Mean of Shannon's Entropy for each Vertex)
  # How homogeneous (high) or heterogenous (low) the degree distribution is
  meandiversity = data.frame(
    # Calculate diversity via Shannon's Entropy for every vertex, using edge weights
    diversity = igraph::diversity(g, weights = igraph::E(g)$evacuation)) %>%
    # Filter out NAs and Infinite Value responses
    filter(!is.na(diversity) & !diversity %in% c(Inf, -Inf)) %>% 
    # Take the mean using the total number of nodes in the network as the denominator.
    sum() / igraph::vcount(g)
  
  
  
  # Disassortativity by Weighted In-degree
  # Negative Pearson's correlation coefficient for Indegree
  # Do popular places SEND people to popular destinations? (ASSORTATIVITY) (Lower Values)
  # OR, do popular places SEND people to unpopular destinations? (DISASSORTATIVITY) (Higher Values)
  disassortativity = g %>% 
    igraph::assortativity(
      directed = TRUE, 
      types1 = igraph::strength(g, mode = "in", loops = FALSE,
                                weights = igraph::E(g)$evacuation)) * -1
  
  # Return a data.frame containing the following data
  data.frame(
    meanwdeg,
    meanwideg, 
    giant_comp,
    trans_global,
    density,
    sdwideg, 
    meandiversity, 
    disassortativity
  ) %>%
    return()
  
  }
  
  # On the other hand, if there are no edges, 
  # please return a data.frame with these values set to zero.
  #if(n <= 0){
    
      # Return a data.frame containing the following data
  #data.frame(
  #  meanwdeg = 0,
  #  meanwideg = 0, 
  #  giant_comp = 0,
  #  trans_global = 0,
  #  density = 0,
  #  sdwideg = 0, 
  #  meandiversity = 0, 
  #  disassortativity = 0
  #) %>%
  #  return()
  
#  }
}




```


Next, we write a function for one level up, to produce the igraph object that we use to get network statistics.

```{r}
get_network = function(edges){

  edges %>%
# We estimate evacuation as the excess user movement greater than what you would expect
    # during baseline conditions
    mutate(evacuation = if_else(n_crisis - n_baseline > 0, n_crisis - n_baseline, 0)) %>%
    select(from_code, to_code, evacuation) %>%
    # Since evacuation can only be positive, filter to positive cases to conserve file space
    filter(evacuation > 0) %>%
    # Get rid of any NA cases
    filter(!is.na(from_code) & !is.na(to_code)) %>%
    
    # Now convert edgelist into a network
    igraph::graph_from_data_frame(
      directed = TRUE, 
      # Build network from the following vertices
      vertices = read_csv("nodes/temp.csv")) %>%
    return()
}
```

```{r}
get_subset = function(increment){
  # Get edgelist
  result <- read_csv("aggregate/temp.csv") %>%
    filter(datetime == increment) %>%
    get_network() %>%
    get_stats()
  
  if(!is.null(result)){
    result %>%
      mutate(datetime = increment) %>%
      return()
  }
}
```


```{r}

get_results = function(file){
  
  # Gather the appropriate nodelist
  if(file %>% str_detect(pattern = "japan|chiba")){
    readRDS(paste("nodes/", file, sep = "")) %>%
      as.data.frame() %>%
      select(muni_code) %>%
      filter(!is.na(muni_code)) %>%
      unique() %>%
      write_csv("nodes/temp.csv")
  }
  
  if(file %>% str_detect(pattern = "jackson|nashville|florida|earthquake_pr")){
    readRDS(paste("nodes/", file, sep = "")) %>%
      as.data.frame() %>%
      select(code) %>%
      filter(!is.na(code)) %>%
      unique() %>%
      write_csv("nodes/temp.csv")
  }
  
  if(file %>% str_detect(pattern = "cave_fire|hillside|getty|shutoff")){
    readRDS("nodes/california.csv") %>%
      as.data.frame() %>%
      select(code) %>%
      filter(!is.na(code)) %>%
      unique() %>%
      write_csv("nodes/temp.csv")
  }
  
  
  
  # Get the date-and-time range for your edgelist 
  time <- read_csv(paste("aggregate/", file, sep = "")) %>%
    mutate(evacuation = if_else(n_crisis - n_baseline > 0, n_crisis - n_baseline, 0)) %>%
    filter(evacuation > 0) %>%
    select(datetime) %>%
    unique()
  
  
  # Save the edgelist as a specific dataset with the name temp.csv
  read_csv(paste("aggregate/", file, sep = "")) %>%
    write_csv("aggregate/temp.csv")
  
  # Run the analysis on each temporal subset of the data
  time$datetime %>%
    lapply(get_subset) %>%
    bind_rows() %>%
    mutate(event = file) %>%
    return()
  
}

``` 

```{r, message = FALSE, warning = FALSE}

aggregated_files <- dir("aggregate") %>% 
  as.data.frame() %>% 
  filter(. != "temp.csv") %>%
  unlist() %>% as.character() 

c("flood_chiba.csv",
  "japan_hagibis.csv", 
  "flood_jackson.csv", "tornado_nashville.csv",
#  "california_shutoff.csv", 
  "southern_california_shutoff.csv", # Looks like this one is better
  "earthquake_pr.csv",
  "getty_fire.csv",
  "hillside_fire_san_bernardino.csv",
  "cave_fire_santa_barbara.csv"
) %>%
  lapply(get_results) %>%
  bind_rows() %>%
  bind_rows(
    "hurricane_dorian_florida.csv" %>%
      get_results()
  ) %>%
  write_csv("network_stats.csv")

remove(get_network, get_results, get_stats, get_subset)
```


# 3. Analyze

Finally, we analyze this data. There are several main ways we can analyze these statistics. 

First, we can do absolute comparisons. This directly compares network statistics across different networks, including different sizes of networks and types. However, this is problematic, because many network statistics are shaped by the size of the network. Since I chose the size of the network, by aggregating to certain the sub-county divisions in certain states, this becomes a subjective threshold that shapes measures like density.

Second, we can do smaller comparisons. For example, we can compare evacuation *within* regions, like Japan or California, and we can compare evacuation *within cases* but *across time*. 

Third, we can transform the whole set of statistics into z-scores, turning rescaling all the statistics for a given event. This allows us to compare different times and patterns regardless of whether the evacuation event was big or small.


```{r, message = FALSE, warning = FALSE}
# Format data
read_csv("network_stats.csv") %>%
  group_by(event) %>%
  arrange(datetime) %>%
  mutate(time = 1:n()) %>% ungroup() %>%
  # Remove California Shutoff, if present;
  # It is an early duplicate of the southern california shutoff
  filter(event != "california_shutoff.csv") %>%
  # recode names
  mutate(name = event %>% recode_factor(
    "southern_california_shutoff.csv" = "Southern California Shutoff",
    "cave_fire_santa_barbara.csv" = "Cave Fire in Santa Barbara",
    "earthquake_pr.csv" = "Earthquake in Southern Puerto Rice",
    "flood_chiba.csv" = "Flood in Chiba, Japan",
    "flood_jackson.csv" = "Flood in Jackson, Mississippi",
    "getty_fire.csv" = "Getty Fire",
    "hillside_fire_san_bernardino.csv" = "Hillside Fire in San Bernardino",
    "hurricane_dorian_florida.csv" = "Hurricane Dorian in Florida",
    "japan_hagibis.csv" = "Typhoon Hagibis in Japan",
    "tornado_nashville.csv" = "Tornado in Nashville, Tennessee")) %>%
  mutate(type = event %>% recode_factor(
    "hurricane_dorian_florida.csv" = "Storm",
    "japan_hagibis.csv" = "Storm",
    "tornado_nashville.csv" = "Tornado",
    "flood_chiba.csv" = "Flood",
    "flood_jackson.csv" = "Flood",
    "getty_fire.csv" = "Fire",
    "hillside_fire_san_bernardino.csv" = "Fire",
    "cave_fire_santa_barbara.csv" = "Fire",
    "earthquake_pr.csv" = "Earthquake",
    "southern_california_shutoff.csv" = "Power Outage")) %>%
  mutate(area = event %>%  recode_factor(
    "hurricane_dorian_florida.csv" = "South",
    "flood_jackson.csv" = "South",
    "tornado_nashville.csv" = "South",
    "getty_fire.csv" = "California",
    "hillside_fire_san_bernardino.csv" = "California",
    "cave_fire_santa_barbara.csv" = "California",
    "southern_california_shutoff.csv" = "California",
    "earthquake_pr.csv" = "Puerto Rico",
    "japan_hagibis.csv" = "Japan",
    "flood_chiba.csv" = "Japan")) %>%
    mutate(area2 = area %>%  recode_factor(
      "South" = "US",
      "California" = "US",
      "Puerto Rico" = "US",
      "Japan" = "Japan")) %>%
  mutate(highlight = if_else(name == "Hurricane Dorian in Florida", 
                             "Hurricane Dorian", "Other Disaster")) %>%
  mutate(hour = datetime %>% word(2) %>% str_sub(1,2)) %>%
  write_csv("network_stats_label.csv")
```

```{r, message = FALSE, warning = FALSE}
library(tidyverse)
dat <- read_csv("network_stats_label.csv")  %>%

  # To compensate for the fact that mean diversity is INVERSELY related to dispersion,
  # we rescale it here to a scale from 0-1, and then subtract that from 1
  # Now, meandiversity is POSITIVELY related to dispersion and easier to visualize
  mutate(meandiversity = 1 - meandiversity %>% scales::rescale(to = c(0,1))) %>%

  select(-event) %>%
  pivot_longer(
    cols = -c(datetime, name, type, highlight, area, area2, time, hour),
    names_to = "measure",
    values_to = "value") %>%
  mutate(measure_name = measure %>% recode_factor(
    "meanwideg" = "Mean Weighted \n In-Degree",
    "meanwdeg" = "Mean Weighted \n Degree",
    "giant_comp" = "Giant Component \n Percentage",
    "trans_global" = "Global Clustering \n Coefficient",
    "density" = "Density",
    "sdwideg" = "Standard Deviation \n of Weighted In-Degree",
    "meandiversity" = "Mean \n Diversity",
    "disassortativity" = "Disassortativity")) %>%
  mutate(group = measure %>% recode_factor(
    "meanwideg" = "Clustering",
    "meanwdeg" = "Clustering",
    "giant_comp" = "Clustering",
    "trans_global" = "Clustering",
    "density" = "Clustering",
    "sdwideg" = "Dispersion",
    "meandiversity" = "Dispersion",
    "disassortativity" = "Dispersion")) 
```

```{r}
# Count number of networks
dat %>%
  select(time, name) %>% 
  distinct() %>%
  group_by(name) %>%
  summarize(networks = n())

```


## 3.1 Mean Weighted Indegree Over Time

```{r}
# Visualize
dat %>%
  filter(measure == "meanwideg", time < 50) %>%
  ggplot(mapping = aes(x = time, y = value, color = name, group = name, linetype = highlight)) +
  geom_line() +
  theme_minimal() +
  viridis::scale_color_viridis(discrete = TRUE, option = "plasma", end = 0.90) +
  labs(x = "Time (in 8-hour periods)", y = "Average Evacuees \n (Mean Weighted In-Degree)",
       color = "Disaster", linetype = "Type")
```


## 3.2 Beta Coefficient Chart


```{r}
# Let's make a model where each network statistic is an outcome,
# comparing the effect of the US vs. Japan,
# comparing the effect of fires, flods, power outages, storms, tornados vs. earthquakes
# comparing the effect of time,
# and comparing the effect of what time of day the evacuation measures came from

robust = function(model){
  model %>%
    lmtest::coeftest(vcov = sandwich::vcovHC(model)) %>%
    broom::tidy() %>%
    rename(std_error = std.error,
           p_value = p.value) %>%
    return()
}


model_each <- function(data){
  dat %>%
    # Filter to specific network statistic
    filter(measure == data) %>%
    # Set earthquakes to be the baseline, since it is the most unusual,
    # and we really want to know the effect of disasters like storms on evacuation
    mutate(type = type %>% factor %>% relevel(ref = "Earthquake")) %>%
    # Run model
    lm(formula = value ~ area2 + type + time + hour) %>%
      # Convert to coefficient table, with robust standard errors
    robust() %>%
    # Specify the outcome
    mutate(outcome = data) %>% 
    
     # Recode names
  mutate(term = term %>% recode_factor(
    "(Intercept)" = "Constant",
    "area2US" = "US",
    "typeFire" = "Fire",
    "typeFlood" = "Flood",
    "typePower Outage" = "Power Outage",
    "typeStorm" = "Storm",
    "typeTornado" = "Tornado",
    "typeEarthquake" = "Earthquake",
    "time" = "Time",
    "hour08" = "8 hours",
    "hour16" = "16 hours")) %>%
  mutate(name = outcome %>% recode_factor(
    "meanwideg" = "Mean Weighted \n In-Degree",
    "meanwdeg" = "Mean Weighted \n Degree",
    "giant_comp" = "Giant Component \n Percentage",
    "trans_global" = "Global Clustering \n Coefficient",
    "density" = "Density",
    "sdwideg" = "Standard Deviation \n of Weighted In-Degree",
    "meandiversity" = "Mean \n Diversity",
    "disassortativity" = "Disassortativity")) %>%
  mutate(group = outcome %>% recode_factor(
    "meanwideg" = "Clustering",
    "meanwdeg" = "Clustering",
    "giant_comp" = "Clustering",
    "trans_global" = "Clustering",
    "density" = "Clustering",
    "sdwideg" = "Dispersion",
    "meandiversity" = "Dispersion",
    "disassortativity" = "Dispersion")) %>%

    return()
}

# Get coefficients
unique(dat$measure) %>% 
  lapply(model_each) %>%
  bind_rows() %>%
  # Filter to show just these disaster variables
  filter(term %in% c("Fire", "Flood", "Storm", "Tornado", "Power Outage")) %>%
  # Make any variable with a p-value NOT below 0.05 show up as white
  mutate(statistic = if_else(p_value >= 0.05, NA_real_, statistic)) %>%
  # Map aesthetics
  ggplot(mapping = aes(x = term, y = name, fill = statistic, label = estimate %>% round(2))) +
  # Make a tile plot based on the t-statistics and label based on beta coefficient
  geom_tile() +
  geom_text() +
  # Fill the tiles from red to white to blue,
  # filling any missing values we made above for p > 0.05 with white, 
  # to signify no statistically significant effect.
  scale_fill_gradient2(midpoint = 0, low = "orange", mid = "white", high = "steelblue", space = "lab", na.value = "white") +
  # make a fancy think colorbar
  guides(fill = guide_colourbar(barwidth = 0.5, barheight = 10)) +
  # Split into a grid
  facet_grid(group ~ ., space = "free_x", scales = "free_y", switch = "y") +
  # Make cool grouping labs for our outcomes
  theme_bw() +
  theme(strip.placement = "outside",
      strip.background = element_rect(fill=NA,colour=NA),
      panel.spacing=unit(0.25,"cm"), 
      panel.grid.major = element_blank(), 
      panel.grid.minor = element_blank(),
      axis.title.y = element_blank(),
      axis.ticks = element_blank()) +
  # center plot title and caption
  theme(
    plot.title = element_text(hjust = 0.5),
    plot.caption = element_text(hjust = 0.5)) +
  # Label plot
  labs(
    title = "Effect of Disaster Type on Evacuation Networks",
    x = "Disaster Type", y = "", color = "t-Statistic",
    caption = c("Tiles are colored by the size of their standardized test statistic. Labels reflect the beta coefficient of each statistic. \n Tiles in white are not statistically significant."))
# panel.spacing = unit(2, "lines")
```

This figure represents the effect of disaster type on the structure of evacuation networks, looking at the effect of fires, floods, power outages, storms, and tornados. To produce this visual, I regressed the effect of six types of disaster against eight different outcomes, statistics summarizing the amount of clustering or dispersion in that network. Each model also controlled for whether the event occurred in the US or Japan, as well as the effect of time, using a numeric counter, as well as time of day, with two dummy variables to control for the three different eight-hour periods for which evacuation data was collected each day. In this case, earthquakes serve as the baseline category; all estimated effects shown in the chart represent the change in evacuation network statistics compared to earthquakes. In this chart, each row represents a model of a specific evacuation network outcome, showing just the beta coefficients for each disaster type. Tile are colored by the size of their standardized test statistic. All tiles significant at less than the p < 0.05 level were automatically colored white, whereas statistics greater that +/- 1.96 (p < 0.05) were colored blue if positive and red if negative. Finally, each tile is labelled by their beta coefficient, so that we can compare the size of coefficients across disaster types.

Four main findings can be derived from this analysis. 

First, storms cause more movement between cities on average (37.6 persons) than other disaster types (4.8-18.3 persons), judging from the scores for mean weighted degree and mean weighted indegree, although it also varies much more than other disasters (with a standarrd deviation of 18.8).

Second, our analysis shows that it is possible to have *both* high clustering and high diversity in an evacuation network, and storms and power outages both demonstrate this trend. These disasters, including Hurricane Dorian, Typhoon Hagibis, and California's Power Outages were all-encompassing, triggering both highly clustered pockets of evacuation as well as evacuation dispersed throughout the region. This is especially demonstrated by the Giant Component Percentage, which shows the percentage of sites that were in the largest cluster in the network.

Third, some disasters tend to be much sparser. Floods and tornados do not lead to much evacuation, possibly because they strike quickly. They tend to be less dissasortative and diverse, and weakly centrally clustered. Fires share clustered outcome, but they also provoke high amounts of movement, leading to higher standard deviations of movement.

Fourth, the earthquake in Puerto Rico did not lead to considerable amounts of evacuation, compared to these other disaster types. We can see that based on the sheer number of positive effects of other disaster types on evacuation network traits.


```{r, results=TRUE, echo = FALSE}


table_each <- function(data){
  require(dplyr)
  dat %>%
    # Filter to specific network statistic
    filter(measure == data) %>%
    # Set earthquakes to be the baseline, since it is the most unusual,
    # and we really want to know the effect of disasters like storms on evacuation
    mutate(type = type %>% factor %>% relevel(ref = "Earthquake")) %>%
      # Run model
    lm(formula = value ~ area2 + type + time + hour) %>%
    return()
}

texreg::htmlreg(
  l = unique(dat$measure) %>% 
    lapply(table_each),
  file = "table_1.html",
  bold = 0.05,
  digits = 2, 
  # Write new custom names
  custom.model.names = c(
    "meanwdeg" = "Mean Weighted Degree",
    "meanwideg" = "Mean Weighted Indegree",
    "giant_comp" = "Giant Component Percentage",
    "trans_global" = "Clustering Coefficient",
    "density" = "Density",
    "swdideg" = "Std. Dev. of Weighted Indegree",
    "meandiversity" = "Inverse Mean Diversity",
    "dissassortativity" = "Dissassortativity"), 
  # Write new custom variable names
  custom.coef.map = list(
    "typeFire" = "Fire",
    "typeFlood" = "Flood",
    "typeStorm" = "Storm",
    "typeTornado" = "Tornado",
    "typePower Outage" = "Power Outage",
    "area2US" = "US",
    "time" = "Time Passed",
    "hour08" = "Day Time",
    "hour16" = "Night Time",
    "(Intercept)" = "Constant"
  ),
  # Replace with robust standard errors
  override.se = unique(dat$measure) %>% 
    lapply(table_each) %>%
    lapply(robust) %>%
    lapply(function(data){data %>% select(std_error) %>% unlist() %>% return()}),
  # Replace with p-value made via robust standard errors
  override.pvalues = unique(dat$measure) %>% 
    lapply(table_each) %>%
    lapply(robust) %>%
    lapply(function(data){data %>% select(p_value) %>% unlist() %>% return()}), 
  custom.note = "Statistically significant effects depicted by *** at the p < 0.001 level, ** at the p < 0.01 level, and * at the p < 0.05 level."
)
 
#unique(dat$measure) %>% 
#    lapply(table_each) %>%
#    lapply(vif) 
```






```{r}
summary_each <- function(data){
  
  require(dplyr)
  require(broom)
  
  dat %>%
    # Filter to specific network statistic
    filter(measure == data) %>%
    # Set Earthquakes to be the baseline, 
    # and we really want to know the effect of disasters like storms on evacuation
    mutate(type = type %>% factor %>% relevel(ref = "Earthquake")) %>%
    # Run model
    lm(formula = value ~ area2 + type + time + hour) %>%
    # Get R2
    broom::glance() %>%
        # Specify the outcome
    mutate(outcome = data) %>% 
    # Recode outcome
     mutate(name = outcome %>% recode_factor(
    "meanwideg" = "Mean Weighted \n In-Degree",
    "meanwdeg" = "Mean Weighted \n Degree",
    "giant_comp" = "Giant Component \n Percentage",
    "trans_global" = "Global Clustering \n Coefficient",
    "density" = "Density",
    "sdwideg" = "Standard Deviation \n of Weighted In-Degree",
    "meandiversity" = "Mean \n Diversity",
    "disassortativity" = "Disassortativity")) %>%
  mutate(group = outcome %>% recode_factor(
    "meanwideg" = "Clustering",
    "meanwdeg" = "Clustering",
    "giant_comp" = "Clustering",
    "trans_global" = "Clustering",
    "density" = "Clustering",
    "sdwideg" = "Dispersion",
    "meandiversity" = "Dispersion",
    "disassortativity" = "Dispersion")) %>%
    return()
}


unique(dat$measure) %>%
  lapply(summary_each) %>%
  bind_rows() %>%
  ggplot(mapping  = aes(x = reorder(name, -r.squared), y = r.squared, fill = r.squared)) +
  geom_col() +
  coord_flip() +
  viridis::scale_fill_viridis(option = "plasma", begin = 0.9, end = 0.5) +
  facet_grid(group ~ ., space = "free_x", scales = "free_y", switch = "y") +
  theme_minimal() +
  # center plot title and caption
  theme(
    plot.title = element_text(hjust = 0.5),
    plot.caption = element_text(hjust = 0.5)) +
  # Make cool grouping labs for our outcomes
  theme(
    strip.placement = "outside",
      strip.background = element_rect(fill=NA,colour=NA),
      panel.spacing=unit(0,"cm"), axis.title.y = element_blank()) +
   # make a fancy think colorbar
  guides(fill = guide_colourbar(barwidth = 0.5, barheight = 10)) +

  labs(x = "Network Statistic", y = "(%) Variation Explained (R-squared)", fill = "R-squared",
       title = "Model Fit by Outcome")
```


## 3.3 Interaction Model (Not Good)

```{r, fig.width= 8}
model_interaction <- function(data){
  dat %>%
    # Filter to specific network statistic
    filter(measure == data) %>%
    # Set power outages to be the baseline, since it is the most unusual,
    # and we really want to know the effect of disasters like storms on evacuation
    mutate(type = type %>% factor %>% relevel(ref = "Earthquake")) %>%
    # Run model
    lm(formula = value ~ area2 + type*time + hour) %>%
    # Convert to coefficient table
    robust() %>%
    # Specify the outcome
    mutate(outcome = data) %>%
  mutate(interaction = if_else(term %>% str_detect(":time"), 
                               "Effect over Time", "Main Effect") %>%
           factor(levels = c("Main Effect", "Effect over Time"))) %>%
     # Recode names
  mutate(term = term %>% recode_factor(
    "(Intercept)" = "Constant",
    "area2US" = "US",
    "typeFire" = "Fire",
    "typeFlood" = "Flood",
    "typePower Outage" = "Power \n Outage",
    "typeStorm" = "Storm",
    "typeTornado" = "Tornado",
    "typeEarthquake" = "Earthquake",
    
    "typeFire:time" = "Fire",
    "typeFlood:time" = "Flood",
    "typePower Outage:time" = "Power \n Outage",
    "typeStorm:time" = "Storm",
    "typeTornado:time" = "Tornado",
    
    "time" = "Time",
    "hour08" = "8 hours",
    "hour16" = "16 hours")) %>%

  mutate(name = outcome %>% recode_factor(
    "meanwideg" = "Mean Weighted \n In-Degree",
    "meanwdeg" = "Mean Weighted \n Degree",
    "giant_comp" = "Giant Component \n Percentage",
    "trans_global" = "Global Clustering \n Coefficient",
    "density" = "Density",
    "sdwideg" = "Standard Deviation \n of Weighted In-Degree",
    "meandiversity" = "Mean \n Diversity",
    "disassortativity" = "Disassortativity")) %>%
  mutate(group = outcome %>% recode_factor(
    "meanwideg" = "Clustering",
    "meanwdeg" = "Clustering",
    "giant_comp" = "Clustering",
    "trans_global" = "Clustering",
    "density" = "Clustering",
    "sdwideg" = "Dispersion",
    "meandiversity" = "Dispersion",
    "disassortativity" = "Dispersion")) %>%

    return()
}

# Get coefficients
unique(dat$measure) %>% 
  lapply(model_interaction) %>%
  bind_rows() %>%
  # Filter to show just these disaster variables
  filter(term %in% c("Fire", "Flood", "Storm", "Tornado", "Power \n Outage")) %>%
  # Make any variable with a p-value NOT below 0.05 show up as white
  mutate(statistic = if_else(p_value >= 0.05, NA_real_, statistic)) %>%
  # Map aesthetics
  ggplot(mapping = aes(x = term, y = name, fill = statistic, label = estimate %>% round(2))) +
  # Make a tile plot based on the t-statistics and label based on beta coefficient
  geom_tile() +
  geom_text() +
  # Fill the tiles from red to white to blue,
  # filling any missing values we made above for p > 0.05 with white, 
  # to signify no statistically significant effect.
  scale_fill_gradient2(midpoint = 0, low = "firebrick", mid = "white", high = "steelblue", space = "lab", na.value = "white") +
  # make a fancy think colorbar
  guides(fill = guide_colourbar(barwidth = 0.5, barheight = 10)) +
  # Split into a grid
  facet_grid(group ~ interaction, space = "free_x", scales = "free_y", switch = "y") +
  # Make cool grouping labs for our outcomes
  theme(strip.placement = "outside",
      strip.background = element_rect(fill=NA,colour=NA),
      panel.spacing=unit(0,"cm"), axis.title.y = element_blank()) +
  # center plot title and caption
  theme(
    plot.title = element_text(hjust = 0.5),
    plot.caption = element_text(hjust = 0.5)) +
  # Label plot
  labs(
    title = "Effect of Disaster Type on Evacuation Networks",
    x = "Disaster Type", y = "", color = "t-Statistic",
    caption = c("Tiles are colored by the size of their standardized test statistic. Labels reflect the beta coefficient of each statistic. \n Tiles in white are not statistically significant."))
```



## 3.4 Chart

```{r}

# I want to see how does a network statistic vary over time
# compared to for several disasters


dat %>%
  # Add a daily counter
  group_by(type, measure, hour) %>%
  mutate(date = 1:n()) %>% ungroup() %>%
  # Rescale the values for each network statistic measure
  # that way, even though each network statistic is on a different scale,
  group_by(measure) %>%
  mutate(value = scale(value)) %>% ungroup() %>%
# Now get the average rescaled network statistic for each disaster type per timestop
  group_by(time, type, group) %>%
  summarize(mean = mean(value, na.rm = TRUE))  %>% ungroup() %>%
  # Now, because Storms and Power Outages have the only interesting trends, highlight just them
  mutate(highlight = if_else(type %in% c("Storm", "Power Outage"), .90, 0.25)) %>%
  mutate(type = type %>% factor(levels = c("Tornado", "Storm", "Flood", 
                                           "Fire", "Power Outage", "Earthquake"))) %>%
# Map aesthetics
  ggplot(mapping = aes(x = time, y = mean, group = type, color = type, alpha = highlight)) +
  # Make a horizontal line at 0
   geom_hline(yintercept = 0, linetype = "dashed", color = "darkgrey") +
 
  geom_point() +
   geom_line(mapping = aes(alpha = highlight),
             size = 1.25,
             stat="smooth",
             method = "loess") +
#  geom_smooth(se = FALSE) +
  theme_bw() +
  facet_wrap(~group) +
  xlim(0, 50) +
  viridis::scale_color_viridis(discrete = "TRUE", option = "plasma", begin = .9, end = 0) +
  theme(
    plot.caption = element_text(hjust = 0.5),
    panel.spacing=unit(0.5,"cm"), 
    axis.ticks = element_blank(),
   # panel.grid.major = element_blank(), 
    panel.grid.minor = element_blank(),
   strip.background = element_rect(
     color="black", fill="white", size=0.5, linetype="solid")) +
  labs(x = "Time (in 8-hour periods)", 
       y = "Average Network Statistic (Z-score)",
       caption = "Loess-smoothed curve per disaster type. \n Storms and Power Outages highlighted due to large variation.",
       color = "Disaster") +
  guides(alpha = FALSE)

```


This chart visualizes how much network structure varies over time depending on the disaster. I rescaled all network statistics from 0 to 1, to make them comparable, and then calculated the average rescaled value for statistics that represent clustering and statistics that represent dispersion. We see that power outages and storms see much higher levels of clustering and dispersion than other disasters, while tornados, earthquakes, and fires see lower levels over time. On the whole, most disasters see constant levels of network structure, but storms and power outages see much greater variation in network structure. Power outages see declining structure, while storms see a gradual increase in *both* the clustering and dispersion of evacuation over the first 6 days, and then a gradual decline over the next eight days.


```{r}


dat %>%
  # Add a daily counter
  group_by(type, measure, hour) %>%
  mutate(date = 1:n()) %>% ungroup() %>%
  # Rescale the values for each network statistic measure
  # that way, even though each network statistic is on a different scale,
  group_by(measure) %>%
  mutate(value = scale(value)) %>% ungroup() %>%
# Now get the average rescaled network statistic for each disaster type per timestop
  group_by(type, group) %>%
  summarize(mean = mean(value, na.rm = TRUE),
            sd = sd(value, na.rm = TRUE))  %>%
  mutate(lower = mean - sd,
         upper = mean + sd) %>% ungroup() %>%
  mutate(type = type %>% 
           reorder(-mean)) %>%


  # Map aesthetics
  ggplot(mapping = aes(x = type, y = mean, 
                       ymin = lower,
                       ymax = upper,
                       color = type)) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "darkgrey") +
  geom_point(size = 3) +
  geom_errorbar(size = 1.25) +
  theme_minimal() +
  coord_flip() +
  viridis::scale_color_viridis(discrete = TRUE, option = "plasma", begin = 0, end = .9) + 
  facet_wrap(~group) +
  guides(color = FALSE) +
  theme(
    plot.title = element_text(hjust = 0.5),
    plot.caption = element_text(hjust = 0.5),
    panel.spacing = unit(0.5, "cm")) +
  labs(x = "", y= "Average Network Statistic (Z-score)", 
       caption = "Error bars depict standard deviation around mean statistic")


```





# 4. County Subdivision Index

Next, we need to create some county subdivision-level indices of social capital.


```{r, message = FALSE, warning = FALSE}
library(readr)
library(tidycensus)
library(tidyverse)
library(censusapi)
library(Amelia)
```



```{r, message = FALSE, warning = FALSE}
####################################American Community Survey (ACS)###################################
######################################################################################################
#ACS codebook: https://api.census.gov/data/2016/acs/acs5/variables.html


census_api_key("97b29053a89e5396ef806bf7f4fae05b7387fabd")

#acs17 <- load_variables(year = 2018, dataset = "acs5", cache = TRUE)

dir.create("index")
dir.create("index/temp")
```




## Race Similarity 


```{r}
census_api_key("97b29053a89e5396ef806bf7f4fae05b7387fabd")
```


```{r}

race = function(stateabb){

# Metdata/source: https://api.census.gov/data/2016/acs/acs5/groups/B02001.html
  
  #Bonding: Race similarity Race Fractionalization (0 = complete homogeneity to 1 = complete heterogeneity)
  race_similarity_vars <- c("B01003_001E", #TOTAL POPULATION
                            "B02001_002E", #Race similarity: Estimate!!Total!!White alone
                            "B02001_003E", #Race similarity: Estimate!!Total!!Black or African American alone
                            "B02001_004E", #Race similarity: Estimate!!Total!!American Indian and Alaska Native alone
                            "B02001_005E", #Race similarity: Estimate!!Total!!Asian alone
                            "B02001_006E", #Race similarity: Estimate!!Total!!Native Hawaiian and Other Pacific Islander alone
                            "B02001_007E", #Race similarity: Estimate!!Total!!Some other race alone
                            "B02001_008E" #Race similarity: Estimate!!Total!!Two or more races
  )
  
  #  Pull data down from Census API
  get_acs(
    year = 2018,
    state = stateabb,
    geography = "county subdivision",
    variables = c(race_similarity_vars),
    survey = "acs5") %>%
    write_csv("index/race_similarity.csv")
  
  #Convert table to wide format and remove margin of error (moe)
  read_csv("index/race_similarity.csv") %>%
    select(-moe) %>%
    pivot_wider(names_from = variable,
                values_from = estimate) %>%
    #Calculate race similarity
    mutate(race_similarity = (1 - (
      (B02001_002 / B01003_001) ^ 2 +
        (B02001_003 / B01003_001) ^ 2 +
        (B02001_004 / B01003_001) ^ 2 +
        (B02001_005 / B01003_001) ^ 2 +
        (B02001_006 / B01003_001) ^ 2 +
        (B02001_007 / B01003_001) ^ 2 +
        (B02001_008 / B01003_001) ^ 2
    )))  %>%
    #Generate dataframe with key vars
    select(GEOID, NAME, race_similarity) %>%
    mutate(GEOID = GEOID %>% str_pad(width = 11, side = "left", pad = "0")) %>%
    return()
}


# Gather data from each state
c(state.abb) %>%
  lapply(race) %>%
  bind_rows() %>%
  write_csv("index/temp/race_similarity.csv") 
```  

## Ethnicity similarity 
  
```{r}  
  
ethnicity = function(stateabb){
  
  # Metdata/source: https://api.census.gov/data/2018/acs/acs5/groups/B03001.html
  
  #Bonding: Ethnicity similarity Ethnicity Fractionalization (0 = complete homogeneity to 1 = complete heterogeneity)
  ethnicity_similarity_vars <- c("B01003_001E", #TOTAL POPULATION, 
                                 "B03001_002E", #Ethnicity similarity: Estimate!!Total!!Not Hispanic or Latino
                                 "B03001_003E" #Ethnicity similarity: Estimate!!Total!!Hispanic or Latino
  )
  
  #  Pull data down from Census API
  get_acs(
    year = 2018,
    state = stateabb,
    geography = "county subdivision",
    variables = c(ethnicity_similarity_vars),
    survey = "acs5"
  ) %>%
    write_csv("index/ethnicity_similarity.csv")
  
  #Convert table to wide format and remove margin of error (moe)
  read_csv("index/ethnicity_similarity.csv") %>%
    select(-moe) %>%
    pivot_wider(names_from = variable,
                values_from = estimate) %>%
    #Calculate ethnicity similarity
    mutate(ethnicity_similarity = (1 - (
      (B03001_002 / B01003_001) ^ 2 +
        (B03001_003 / B01003_001) ^ 2
    ))) %>%
    #Generate dataframe with key vars
    select(GEOID, NAME, ethnicity_similarity) %>%
    mutate(GEOID = GEOID %>% str_pad(width = 11, side = "left", pad = "0")) %>%
    return()
}

# Gather data from each state
c(state.abb) %>%
  lapply(ethnicity) %>%
  bind_rows() %>%
  write_csv("index/temp/ethnicity_similarity.csv")

```
  

## Educational equality

```{r}  
education = function(stateabb){
  # Metdata/source: https://api.census.gov/data/2016/acs/acs5/subject/groups/S1501.html
  
  #Bonding: Educational equality Negative absolute difference between % of total population with college education and % of total population with less than high school education
  education_equality_vars <- c("S1501_C01_001E", #Total!!Estimate!!Population 18 to 24 years
                               "S1501_C01_002E", #Total!!Estimate!!Population 18 to 24 years!!Less than high school graduate
                               "S1501_C01_005E", #Total!!Estimate!!Population 18 to 24 years!!Bachelor's degree or higher
                               "S1501_C02_002E", #Percent!!Estimate!!Population 18 to 24 years!!Less than high school graduate
                               "S1501_C02_005E"	#Percent!!Estimate!!Population 18 to 24 years!!Bachelor's degree or higher
  )
  
  #  Pull data down from Census API
  get_acs(year = 2018, 
          geography = "county subdivision", 
          state = stateabb,
          variables = c(education_equality_vars), 
          survey = "acs5") %>% 
    write_csv("index/education_equality.csv")
  
  #Convert table to wide format and remove margin of error (moe)
  read_csv("index/education_equality.csv") %>%
    select(-moe) %>% 
    pivot_wider(names_from = variable, 
                values_from = estimate) %>%
    #Calculate ethnicity similarity
    # get absolute value of difference between college and high school educated persons
    # where a large gap indicates homophily and small gap indicates heterophily
    mutate(EE = abs(S1501_C02_005-S1501_C02_002)) %>% 
    # now subtract it from 1, where 1 now = heterophily and 0 = homophily
    mutate(education = 1-EE) %>% 
    #Generate dataframe with key vars
    select(GEOID, NAME, education) %>%
    mutate(GEOID = GEOID %>% str_pad(width = 11, side = "left", pad = "0")) %>%
    return()
}

c(state.abb) %>%
  lapply(education) %>%
  bind_rows() %>%
  write_csv("index/temp/education.csv")

```

## Gini Index

```{r}
  
gini = function(stateabb){
  
  # Metdata/source: https://api.census.gov/data/2018/acs/acs5/groups/B19083.html
  
  #Bonding:  Race/income equality Gini coefficient (0 = perfect equality to 1 = perfect inequality)
  gini_var <- c("B19083_001E") #Income inequality: Estimate!!Gini Index)             
  
  
  #  Pull data down from Census API
  get_acs(
    year = 2018,
    geography = "county subdivision",
    state = stateabb,
    variables = c(gini_var),
    survey = "acs5") %>%
    write_csv("index/gini_index.csv")
  
  #Convert table to wide format and remove margin of error (moe)
  read_csv("index/gini_index.csv") %>%
    select(-moe) %>%
    pivot_wider(names_from = variable,
                values_from = estimate) %>%
    #Calculate gini var and generate dataframe with key var
    # Here, we're actually already calculating it into the right direction, where
    # 1 now equals perfect equality and 0 equals perfect inequality
    mutate(gini = 1 - B19083_001) %>%
    select(GEOID,
           NAME,
           gini) %>%
    mutate(GEOID = GEOID %>% str_pad(width = 11, side = "left", pad = "0")) %>%
    return()
}

c(state.abb) %>%
  lapply(gini) %>%
  bind_rows() %>%
    write_csv("index/temp/gini_index.csv")
```


## Employment equality

```{r}  
  
employment = function(stateabb){
  # Metdata/source: https://api.census.gov/data/2015/acs/acs1/groups/B23025.html
  
  #Bonding: Employment equality Absolute difference between % of total employed and % of total unemployed labor force
  employment_equality_vars <- c("B23025_003E", #Employment equality: Estimate!!Total!!In labor force!!Civilian labor force
                                "B23025_004E", #Employment equality: Estimate!!Total!!In labor force!!Civilian labor force!!Employed
                                "B23025_005E" #Employment equality: Estimate!!Total!!In labor force!!Civilian labor force!!Unemployed
  )
  
  #  Pull data down from Census API
  get_acs(
    year = 2018,
    state = stateabb,
    geography = "county subdivision",
    variables = c(employment_equality_vars),
    survey = "acs5"
  ) %>%
    write_csv("index/employment.csv")
  
  #Convert table to wide format and remove margin of error (moe)
  read_csv("index/employment.csv") %>%
    select(-moe) %>%
    pivot_wider(names_from = variable,
                values_from = estimate) %>%
    #Calculate employment equality
    mutate(
      percent_employed = (B23025_004 / B23025_003),
      percent_unemployed = (B23025_005 / B23025_003),
      employment_equality_1 = abs(percent_employed - percent_unemployed)) %>%
    mutate(
      # Here, like with college,
      # 1 now = heterophily, while 0 = homophily.
      # This will need to eventually be rescaled
      employment_equality = 1 - employment_equality_1) %>%
    #Generate table with key vars
    select(GEOID,
           NAME,
           employment_equality) %>%
    mutate(GEOID = GEOID %>% str_pad(width = 11, side = "left", pad = "0")) %>%
    return()
  
}

c(state.abb) %>%
  lapply(employment) %>%
  bind_rows() %>%
  write_csv("index/temp/employment.csv")
```



## Gender income similarity

```{r}

gender = function(stateabb){
  # Metdata/source: https://api.census.gov/data/2018/acs/acs5/subject/groups/S1903.html
  
  gender_income_similarity_vars <- c("S1903_C03_034E",	#Estimate!!Median income (dollars)!!NONFAMILY HOUSEHOLDS!!Nonfamily households
                                     "S1903_C03_035E",	#Estimate!!Median income (dollars)!!NONFAMILY HOUSEHOLDS!!Nonfamily households!!Female householder
                                     "S1903_C03_038E"	#Estimate!!Median income (dollars)!!NONFAMILY HOUSEHOLDS!!Nonfamily households!!Male householder
  )
  
  #  Pull data down from Census API
  get_acs(
    year = 2018,
    geography = "county subdivision",
    state = stateabb,
    variables = c(gender_income_similarity_vars),
    survey = "acs5"
  ) %>%
    write_csv("index/gender_income.csv")
  
  #After running summary, there were missing values in each variable. Values were imputated with the 'Hmisc' package using the mean of the variable.
  
  #S1903_C03_035 missing values 
  #Alpine County, California
  #Chattahoochee County, Georgia
  #Robertson County, Kentucky
  #Loup County, Nebraska
  #Hyde County, North Carolina
  #Kenedy County, Texas
  #Loving County, Texas
  #McMullen County, Texas
  #Daggett County, Utah
  
  #S1903_C03_034 missing values
  #Glasscock County, Texas
  #McMullen County, Texas
  #Daggett County, Utah
  
  # S1903_C03_038 missing values
  #Presidio County, Texas
  #Eureka County, Nevada
  #Elliott County, Kentucky
  #Emporia city, Virginia
  #Jim Hogg County, Texas
  #Banner County, Nebraska
  #Bacon County, Georgia
  #Zapata County, Texas
  #Mora County, New Mexico
  #Motley County, Texas
  #Schleicher County, Texas
  #Issaquena County, Mississippi
  #Custer County, Colorado
  #Kenedy County, Texas
  #Garfield County, Washington
  #Kimble County, Texas
  #Sterling County, Texas
  #Petroleum County, Montana
  #Loving County, Texas
  #Morgan County, Utah
  #Borden County, Texas
  #Glasscock County, Texas
  #McMullen County, Texas
  #Daggett County, Utah
  
  #Convert table to wide format and remove margin of error (moe)
  read_csv("index/gender_income.csv") %>%
    select(-moe) %>%
    pivot_wider(names_from = variable,
                values_from = estimate) %>%
    # If values are missing, impute them with the mean value from that variable
    mutate_at(vars(S1903_C03_034,S1903_C03_035, S1903_C03_038),
              funs(if_else(is.na(.), mean(., na.rm = TRUE), .))) %>%
    #Calculate gender income similarity
    mutate(
      TI = S1903_C03_038 + S1903_C03_035,
      MI = (S1903_C03_038 / TI) ^ 2,
      FI = (S1903_C03_035 / TI) ^ 2,
      GIS = (1 - (MI + FI)),
      gender_income_similarity = (GIS - (min(GIS))) / (max(GIS) - min(GIS))
    ) %>%
    #Generate a dataframe of key vars
    select(GEOID,
           NAME,
           gender_income_similarity) %>%
    mutate(GEOID = GEOID %>% str_pad(width = 11, side = "left", pad = "0")) %>%
    return()
}

c(state.abb) %>%
  lapply(gender) %>%
  bind_rows() %>%
    write_csv("index/temp/gender_income_similarity.csv")
```


## Proficient English speakers

```{r}  
  
language = function(stateabb){
  # Metdata/source: https://api.census.gov/data/2015/acs/acs1/groups/B16004.html
  
  #Bonding: Language competency % of total population proficient English speakers 
  language_competency_vars <- c(
    "B16004_024E",  #Language competency Estimate!!Total!!18 to 64 years
    "B16004_025E",  #Language competency Estimate!!Total!!18 to 64 years!!Speak only English
    "B16004_027E",  #Language competency Estimate!!Total!!18 to 64 years!!Speak Spanish!!Speak English "very well"
    "B16004_032E",  #Language competency Estimate!!Total!!18 to 64 years!!Speak other Indo-European languages!!Speak English "very well"
    "B16004_037E",  #Language competency Estimate!!Total!!18 to 64 years!!Speak Asian and Pacific Island languages!!Speak English "very well"
    "B16004_042E"  #Language competency Estimate!!Total!!18 to 64 years!!Speak other languages!!Speak English "very well"
  )
  
  #  Pull data down from Census API 
  get_acs(
    year = 2018,
    geography = "county subdivision",
    state = stateabb,
    variables = c(language_competency_vars,
                  "GEO_ID",
                  "YEAR"),
    survey = "acs5"
  ) %>%
    write_csv("index/language_competency.csv")
  
  #Convert table to wide format and remove margin of error (moe)
  read_csv("index/language_competency.csv") %>%
    select(-moe) %>%
    pivot_wider(names_from = variable,
                values_from = estimate) %>%
    #Calculate language competency
    mutate(lang = ((B16004_025 + B16004_027 + B16004_032 + B16004_037 + B16004_042) /
                     B16004_024)) %>%
    #Generate dataframe with key vars
    select(GEOID, NAME, lang) %>%
    mutate(GEOID = GEOID %>% str_pad(width = 11, side = "left", pad = "0")) %>%
    return()
  
}
  
c(state.abb) %>%
  lapply(language) %>%
  bind_rows() %>%
  write_csv("index/temp/language_competency.csv")
```


## Communication capacity

```{r}  

communication = function(stateabb){
  # Metdata/source:  https://api.census.gov/data/2015/acs/acs1/groups/B25043.html
  
  #Bonding: Communication capacity % of total households with a telephone
  communication_capacity_vars <- c("B25043_001E",  #Communication capacity: Estimate!!Total
                                   "B25043_003E",  #Communication capacity: Estimate!!Total!!Owner occupied!!With telephone service available
                                   "B25043_007E",  #Communication capacity: Estimate!!Total!!Owner occupied!!No telephone service available
                                   "B25043_012E",  #Communication capacity: Estimate!!Total!!Renter occupied!!With telephone service available
                                   "B25043_016E"  #Communication capacity: Estimate!!Total!!Renter occupied!!No telephone service available
  )
  
  #  Pull data down from Census API
  get_acs(
    year = 2018,
    state = stateabb,
    geography = "county subdivision",
    variables = c(communication_capacity_vars),
    survey = "acs5") %>%
    write_csv("index/communication_capacity.csv")
  
  #Convert table to wide format and remove margin of error (moe)
  read_csv("index/communication_capacity.csv") %>%
    select(-moe) %>%
    pivot_wider(names_from = variable,
                values_from = estimate) %>%
    #Calculate communication capacity
    mutate(telephone = ((B25043_003 + B25043_012) / B25043_001)) %>%
    #Generate table with key vars
    select(GEOID,
           NAME,
           telephone) %>%
    mutate(GEOID = GEOID %>% str_pad(width = 11, side = "left", pad = "0")) %>%
    return()
}

c(state.abb) %>%
  lapply(communication) %>%
  bind_rows() %>%
  write_csv("index/temp/communication_capacity.csv")

``` 

  
## Non-elder population

```{r}  

elder = function(stateabb){
  # Metdata/source:  https://api.census.gov/data/2016/acs/acs5/subject/groups/S0101.html
  
  #Bonding: Non-elder population % of total population below 65 years of age
  nonelder_vars <- c("S0101_C01_001E", #Total!!Estimate!!Total population
                     "S0101_C01_002E", #Total!!Estimate!!AGE!!Under 5 years
                     "S0101_C01_003E", #Total!!Estimate!!AGE!!5 to 9 years
                     "S0101_C01_004E", #Total!!Estimate!!AGE!!10 to 14 years
                     "S0101_C01_005E", #Total!!Estimate!!AGE!!15 to 19 years
                     "S0101_C01_006E", #Total!!Estimate!!AGE!!20 to 24 years
                     "S0101_C01_007E", #Total!!Estimate!!AGE!!25 to 29 years
                     "S0101_C01_008E", #Total!!Estimate!!AGE!!30 to 34 years
                     "S0101_C01_009E", #Total!!Estimate!!AGE!!35 to 39 years
                     "S0101_C01_010E", #Total!!Estimate!!AGE!!40 to 44 years
                     "S0101_C01_011E", #Total!!Estimate!!AGE!!45 to 49 years
                     "S0101_C01_012E", #Total!!Estimate!!AGE!!50 to 54 years
                     "S0101_C01_013E", #Total!!Estimate!!AGE!!55 to 59 years
                     "S0101_C01_014E" #Total!!Estimate!!AGE!!60 to 64 years
  )
  
  #  Pull data down from Census API
  get_acs(
    year = 2018,
    state = stateabb,
    geography = "county subdivision",
    variables = c(nonelder_vars),
    survey = "acs5"
  ) %>%
    write_csv("index/nonelder.csv")
  
  #Convert table to wide format and remove margin of error (moe)
  read_csv("index/nonelder.csv") %>%
    select(-moe) %>%
    pivot_wider(names_from = variable,
                values_from = estimate) %>%
    # Calculate non-elder population
    mutate(non_elder = ((
      S0101_C01_002 + S0101_C01_003 + S0101_C01_004 + S0101_C01_005 + S0101_C01_006 +
        S0101_C01_007 + S0101_C01_008 + S0101_C01_009 + S0101_C01_010 + S0101_C01_011 +
        S0101_C01_012 + S0101_C01_013 + S0101_C01_014
    ) / S0101_C01_001
    )) %>%
    #Generate table with key vars
    select(GEOID,
           NAME,
           non_elder) %>%
        mutate(GEOID = GEOID %>% str_pad(width = 11, side = "left", pad = "0")) %>%
    return()
  
}
  
c(state.abb) %>%
  lapply(elder) %>%
  bind_rows() %>%
    write_csv("index/temp/nonelder.csv")
```





## Local, state, and federal government workers

```{r}

govt = function(stateabb){
  # Metdata/source: https://api.census.gov/data/2015/acs/acs1/groups/B08128.html
  
  #Linking: Local, State, and Federal Government Linkage
  gov_workers_vars <- c("B08128_006E", #Local government linkage: Estimate!!Total!!Local government workers
                        "B08128_007E", #State government linkage: Estimate!!Total!!State government workers
                        "B08128_008E", #Federal government linkage: Estimate!!Total!!Federal government workers
                        "B08128_001E" #Local, state and federal government workers: Estimate!!Total
  )
  
  
  #  Pull data down from Census API
  get_acs(
    year = 2018,
    state = stateabb,
    geography = "county subdivision",
    variables = c(gov_workers_vars),
    survey = "acs5") %>%
    write_csv("index/gov_workers.csv")
  
  #Convert table to wide format and remove margin of error (moe)
  read_csv("index/gov_workers.csv") %>%
    select(-moe) %>%
    pivot_wider(names_from = variable,
                values_from = estimate) %>%
    # Calculate non-elder population
    mutate(local_gov = B08128_006/B08128_001,
           state_gov = B08128_007/B08128_001,
           fed_gov = B08128_008/B08128_001) %>%
    #Generate table with key vars
    select(GEOID,
           NAME,
           local_gov,
           state_gov,
           fed_gov) %>%
        mutate(GEOID = GEOID %>% str_pad(width = 11, side = "left", pad = "0")) %>%
    return()
  
}
  
c(state.abb) %>%
  lapply(govt) %>%
  bind_rows() %>%
  write_csv("index/temp/gov_workers.csv")
```







## Community Business Patterns

```{r}
library(censusapi)
library(tidycensus)
library(tidyverse)
```


```{r}
# Get the population in 2016 for every zipcode tabulation area
get_acs(
  year = 2016,
  geography = "zcta",
  variables = c("B01003_001E"),
  survey = "acs5"
) %>%
  write_csv("index/population.csv")


##################################Community Business Patterns Survey##################################
######################################################################################################

#SoCI Variables: Bridging Social Capital: Religious organizations: Religious organizations per 10,000 persons,  Civic organizations: Civic organizations per 10,000 persons
#https://www2.census.gov/programs-surveys/cbp/technical-documentation/reference/naics-descriptions/naics2018.txt
#https://www.census.gov/data/developers/data-sets/cbp-nonemp-zbp.html 

#censusapi API key setup to query Community Business Patterns Survey
Sys.setenv(CENSUS_KEY="97b29053a89e5396ef806bf7f4fae05b7387fabd")
readRenviron("~/.Renviron")
Sys.getenv("CENSUS_KEY")

#Notes:
#2010 = NAICS2007
#2018 = NAICS2012

#To see a current table of every available endpoint, run listCensusApis:
# apis <- listCensusApis()




########################## CIVIC ORGS ##########################

# Civic organizations: Civic organizations per 10,000 persons: Community Business Patterns
#"8132//","Grantmaking and Giving Services"
#"8133//","Social Advocacy Organizations"
#"8134//","Civic and Social Organizations"
#"8139//","Business, Professional, Labor, Political, and Similar Organizations"

########################## 2016 ##########################
library(censusapi)
getCensus(name = "zbp",
          vintage = 2016,
          vars = c("ESTAB", "GEO_TTL", "GEO_ID", "YEAR"),
          region = "zipcode:*",
          naics2012 = 8132) %>%
  write_csv("index/giving.csv")

getCensus(name = "zbp", 
          vintage = 2016,
          vars = c("ESTAB", "GEO_ID","GEO_TTL","YEAR"),
          region = "zipcode:*",
          naics2012 = "8133") %>%
  write_csv("index/social_advocacy.csv")

getCensus(name = "zbp", 
          vintage = "2016",
          vars = c("ESTAB", "GEO_ID","GEO_TTL","YEAR"),
          region = "zipcode:*",
          naics2012 = "8134") %>%
  write_csv("index/civic_orgs.csv")

getCensus(name = "zbp", 
          vintage = "2016",
          vars = c("ESTAB", "GEO_ID","GEO_TTL","YEAR"),
          region = "zipcode:*",
          naics2012 = "8139") %>%
  write_csv("index/unions.csv")

########################## RELIGIOUS ORGS ##########################
# Religious organizations: Religious organizations per 10,000 persons: Community Business Patterns
#"8131//","Religious Organizations"
########################## 2016 ##########################

getCensus(name = "zbp", 
          vintage = "2016",
          vars = c("ESTAB", "GEO_ID","GEO_TTL","YEAR"),
          region = "zipcode:*",
          naics2012 = "8131") %>%
  write_csv("index/religious_orgs.csv")


# To convert between Zipcodes and 
# https://www.huduser.gov/portal/datasets/usps_crosswalk.html

# This calculates the estimated number of establishments in a census tract, spatially averaged from zipcodes.

# First, combine all zipcode measures into one dataframe
bind_rows(
  read_csv("index/giving.csv"),
  read_csv("index/social_advocacy.csv"),
  read_csv("index/civic_orgs.csv"),
  read_csv("index/unions.csv"),
  read_csv("index/religious_orgs.csv")
) %>%
  # Join in zipcode population
  left_join(by = c("zip_code" = "GEOID"),
            y = read_csv("index/population.csv") %>% 
              select(GEOID, pop = estimate)) %>%
  mutate(rate = ESTAB / pop * 10000) %>%
  select(zip_code, GEO_ID, GEO_TTL, YEAR, NAICS2012, rate) %>%
  # Join in tracts associated with those zipcodes
  # Using 2019 zipcode-county subdivision crosswalk
  # the first year that was made available
  #https://www.huduser.gov/portal/datasets/usps_crosswalk.html
  left_join(by = c("zip_code" = "zip"),
            y = read_csv("index/zipcsub_crosswalk.csv") %>% 
  select(zip, county_sub)) %>%
  # For each tract and topic,
  # Calculate the mean number of establishments
  group_by(county_sub, NAICS2012) %>%
  summarize(rate = mean(rate, na.rm = TRUE)) %>%
  # Now recode the variable numbers into names
  mutate(NAICS2012 = NAICS2012 %>% recode(
    "8131" = "religious_orgs",
    "8132" = "charitable_orgs",
    "8133" = "advocacy_orgs",
    "8134" = "civic_orgs",
    "8139" = "unions")) %>%
  # and pivot into a wider dataframe with many columns
  pivot_wider(
    id_cols = c(county_sub),
    names_from = NAICS2012,
    values_from = rate) %>%
   mutate(GEOID = county_sub %>% str_pad(width = 10, side = "left", pad = "0")) %>%
  ungroup() %>%
  select(-county_sub) %>%
  # and export for future use
  write_csv("index/temp/establishments.csv")
```



```{r}
# Finally, to get ESRI's data on fraternal ties and political acts, we just borrow it from the original index at the county level.
haven::read_dta("index/SoCI_09262019.dta") %>% head()
  select(geoid, countyname, fraternal, civic, charitable, union, politicalacts) %>%
  mutate(geoid = geoid %>% str_sub(-5, nchar(geoid))) %>%
  rename(GEOID = geoid) %>% select(-countyname) %>%
  write_csv("index/esri_county.csv")
```



## Voting age population

```{r, message = FALSE, warning = FALSE}
#NOT AVAILABLE ON API
library(tidyverse)
# Metdata/source: https://www.census.gov/programs-surveys/decennial-census/about/voting-rights/cvap.html
# https://www.census.gov/geographies/reference-files/time-series/geo/relationship-files.html

#Linking: Political linkage: % of total voting-age population who are eligible for voting
#TOT_EST: The rounded estimate of the total number of people for that geographic area and group.
#CVAP_EST: The rounded estimate of the total number of United States citizens 18 years of age or older for that geographic area and group.

read_csv("index/CVAP/Tract.csv") %>%
  # Filter into total voter traits
  filter(str_detect(lntitle, "Total")) %>%
  # Calculate voting age population
  # One major change from the county level index is that we don't have the total persons here;
  # Just the total US citizen population
  # So this becomes what share of US citizens can vote, not what share of total residents can vote
  mutate(
    voting_age = (cvap_est / cit_est)) %>%
  select(GEOID = geoid, NAME = geoname, voting_age) %>%
  mutate(GEOID = GEOID %>% str_sub(-11, nchar(GEOID))) %>%
  mutate(GEOID = GEOID %>% str_pad(width = 11, side = "left", pad = "0")) %>%
  write_csv("index/temp/voting_age.csv")

```


I need to take each census tract and aggregate to county subdivision level.


```{r}
# Make a folder "points" to put census tract points in
#dir.create("points")

# Load packages
library(sf)
library(tidyverse)
library(rgdal)
library(readr)
library(dplyr)

# Get equal area conic projection
aea <- "+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=23 +lon_0=-96 +x_0=0 +y_0=0 +ellps=clrk66 +units=m +no_defs" 

# Get census tracts as centroid points
get_centroids = function(data, proj = aes){
  require(tigris)
  require(dplyr)
  require(sf)
  tigris::tracts(state = data, cb = FALSE, year = 2018) %>%
  # Convert to sf object
  st_as_sf() %>%
  # Use AEA projection
  st_transform(st_crs(proj)) %>%
  # Get census tract centroids
  st_centroid() %>%
  # grab just the GEOID and point geometry
  select(geoid = GEOID, geometry) %>%
  return()
}

# Get census tracts for California, Arizona, and Nevada
"CA" %>% get_centroids() %>%
  saveRDS("points/CA.csv")
"NV" %>% get_centroids() %>%
  saveRDS("points/NV.csv")
"AZ" %>% get_centroids() %>%
  saveRDS("points/AZ.csv")

# Bind together
rbind(
  readRDS("points/CA.csv"),
  readRDS("points/NV.csv"),
  readRDS("points/AZ.csv")) %>%
  saveRDS("points/points_california.csv")

# Get centroids for Hurricane Dorian
"AL" %>% get_centroids() %>%
  saveRDS("points/AL.csv")
"FL" %>% get_centroids() %>%
  saveRDS("points/FL.csv")
"GA" %>% get_centroids() %>%
  saveRDS("points/GA.csv")

# Bind together
rbind(
  readRDS("points/AL.csv"),
  readRDS("points/FL.csv"),
  readRDS("points/GA.csv")) %>%
  saveRDS("points/points_hurricane_dorian.csv")  

# Get centroids for Tornado in Nashville
"TN" %>% get_centroids() %>%
  saveRDS("points/points_tornado_nashville.csv")


# Get centroids for Jackson Floods
"MS" %>% get_centroids() %>%
  saveRDS("points/MS.csv")
"AR" %>% get_centroids() %>%
  saveRDS("points/AR.csv")
"AL" %>% get_centroids() %>%
  saveRDS("points/AL.csv")
"LA" %>% get_centroids() %>%
  saveRDS("points/LA.csv")
# Bind together
rbind(
  readRDS("points/MS.csv"),
  readRDS("points/AR.csv"),
  readRDS("points/AL.csv"),
  readRDS("points/LA.csv")) %>%
  saveRDS("points/points_flood_jackson.csv")





pr <- "+proj=lcc +lat_1=18.43333333333333 +lat_2=18.03333333333333 +lat_0=17.83333333333333 +lon_0=-66.43333333333334 +x_0=200000 +y_0=200000 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs" 

# Get Puerto Rico centroids
72 %>% get_centroids(proj = pr) %>%
  saveRDS("points/PR.csv")
# Get Virgin Islands
78 %>% get_centroids(proj = pr) %>%
  saveRDS("points/VI.csv")
# Bind together
rbind(
  readRDS("points/PR.csv"),
  readRDS("points/VI.csv")) %>%
  saveRDS("points/points_earthquake_puerto_rico.csv")
```



```{r}
# So, grab county subdivisions, stored in NODES folder
readRDS("nodes/california.csv") %>% 
    mutate(code = code %>% str_pad(width = 10, side = "left", pad = "0")) %>%
  rename(code_csub = code) %>%
  # Spatially join the traits of points lying within these polygons
  st_join(readRDS("points/points_california.csv") %>% 
            rename(code_tract = geoid)) %>%
  # Now left-join in census tract data like Voting Age
  left_join(by = "code_tract",
            y = read_csv("index/temp/voting_age.csv") %>% 
                          mutate(GEOID = GEOID %>% str_pad(width = 11, side = "left", pad = "0")) %>%
              select(code_tract = GEOID, voting_age)) %>%
  # Now left-join in census tract data like zipcode business patterns info
  left_join(by = c("code_tract" = "GEOID"),
            y = read_csv("index/temp/establishments.csv") %>%
              mutate(GEOID = GEOID %>% str_pad(width = 11, side = "left", pad = "0"))) %>%
  as.data.frame() %>%
  select(-geometry) %>%
  # Now, if NaN or Infinite, set all of these to NA so that they don't mess up the averaging
  mutate_at(vars(voting_age, religious_orgs, advocacy_orgs, civic_orgs, unions, charitable_orgs),
            funs(if_else(!is.na(.) & !is.infinite(.), ., NA_real_))) %>%
  # Get average voting age per county subdivision
  group_by(geoid = code_csub) %>%
  summarize(voting_age = mean(voting_age, na.rm = TRUE),
            religious_orgs = mean(religious_orgs, na.rm = TRUE),
            advocacy_orgs = mean(advocacy_orgs, na.rm = TRUE),
            civic_orgs = mean(civic_orgs, na.rm = TRUE),
            unions = mean(unions, na.rm = TRUE),
            charitable_orgs = mean(charitable_orgs, na.rm = TRUE)) %>%
  ungroup() %>% distinct() %>%
  write_csv("index/temp/voting_age_california.csv")

```




```{r}
# So, grab county subdivisions, stored in NODES folder
readRDS("nodes/earthquake_pr.csv") %>% 
    mutate(code = code %>% str_pad(width = 10, side = "left", pad = "0")) %>%
  rename(code_csub = code) %>%
  # Spatially join the traits of points lying within these polygons
  st_join(readRDS("points/points_earthquake_puerto_rico.csv") %>% 
            rename(code_tract = geoid)) %>%
  # Now left-join in census tract data like Voting Age
  left_join(by = "code_tract",
            y = read_csv("index/temp/voting_age.csv") %>% 
                          mutate(GEOID = GEOID %>% str_pad(width = 11, side = "left", pad = "0")) %>%
              select(code_tract = GEOID, voting_age)) %>%
  # Now left-join in census tract data like zipcode business patterns info
  left_join(by = c("code_tract" = "GEOID"),
            y = read_csv("index/temp/establishments.csv") %>%
              mutate(GEOID = GEOID %>% str_pad(width = 11, side = "left", pad = "0"))) %>%
  as.data.frame() %>%
  select(-geometry) %>%
  # Now, if NaN or Infinite, set all of these to NA so that they don't mess up the averaging
  mutate_at(vars(voting_age, religious_orgs, advocacy_orgs, civic_orgs, unions, charitable_orgs),
            funs(if_else(!is.na(.) & !is.infinite(.), ., NA_real_))) %>%
  # Get average voting age per county subdivision
  group_by(geoid = code_csub) %>%
  summarize(voting_age = mean(voting_age, na.rm = TRUE),
            religious_orgs = mean(religious_orgs, na.rm = TRUE),
            advocacy_orgs = mean(advocacy_orgs, na.rm = TRUE),
            civic_orgs = mean(civic_orgs, na.rm = TRUE),
            unions = mean(unions, na.rm = TRUE),
            charitable_orgs = mean(charitable_orgs, na.rm = TRUE)) %>%
  ungroup() %>% distinct() %>%
  write_csv("index/temp/voting_age_earthquake_puerto_rico.csv")
 
```



```{r}
# So, grab county subdivisions, stored in NODES folder
readRDS("nodes/flood_jackson.csv") %>% 
    mutate(code = code %>% str_pad(width = 10, side = "left", pad = "0")) %>%
  rename(code_csub = code) %>%
  # Spatially join the traits of points lying within these polygons
  st_join(readRDS("points/points_flood_jackson.csv") %>% 
            rename(code_tract = geoid)) %>%
  # Now left-join in census tract data like Voting Age
  left_join(by = "code_tract",
            y = read_csv("index/temp/voting_age.csv") %>% 
                          mutate(GEOID = GEOID %>% str_pad(width = 11, side = "left", pad = "0")) %>%
              select(code_tract = GEOID, voting_age)) %>%
  # Now left-join in census tract data like zipcode business patterns info
  left_join(by = c("code_tract" = "GEOID"),
            y = read_csv("index/temp/establishments.csv") %>%
              mutate(GEOID = GEOID %>% str_pad(width = 11, side = "left", pad = "0"))) %>%
  as.data.frame() %>%
  select(-geometry) %>%
  # Now, if NaN or Infinite, set all of these to NA so that they don't mess up the averaging
  mutate_at(vars(voting_age, religious_orgs, advocacy_orgs, civic_orgs, unions, charitable_orgs),
            funs(if_else(!is.na(.) & !is.infinite(.), ., NA_real_))) %>%
  # Get average voting age per county subdivision
  group_by(geoid = code_csub) %>%
  summarize(voting_age = mean(voting_age, na.rm = TRUE),
            religious_orgs = mean(religious_orgs, na.rm = TRUE),
            advocacy_orgs = mean(advocacy_orgs, na.rm = TRUE),
            civic_orgs = mean(civic_orgs, na.rm = TRUE),
            unions = mean(unions, na.rm = TRUE),
            charitable_orgs = mean(charitable_orgs, na.rm = TRUE)) %>%
  ungroup() %>% distinct() %>%
  write_csv("index/temp/voting_age_flood_jackson.csv")
 
```


```{r}
# So, grab county subdivisions, stored in NODES folder
readRDS("nodes/tornado_nashville.csv") %>% 
    mutate(code = code %>% str_pad(width = 10, side = "left", pad = "0")) %>%
  rename(code_csub = code) %>%
  # Spatially join the traits of points lying within these polygons
  st_join(readRDS("points/points_tornado_nashville.csv") %>% 
            rename(code_tract = geoid)) %>%
  # Now left-join in census tract data like Voting Age
  left_join(by = "code_tract",
            y = read_csv("index/temp/voting_age.csv") %>% 
                          mutate(GEOID = GEOID %>% str_pad(width = 11, side = "left", pad = "0")) %>%
              select(code_tract = GEOID, voting_age)) %>%
  # Now left-join in census tract data like zipcode business patterns info
  left_join(by = c("code_tract" = "GEOID"),
            y = read_csv("index/temp/establishments.csv") %>%
              mutate(GEOID = GEOID %>% str_pad(width = 11, side = "left", pad = "0"))) %>%
  as.data.frame() %>%
  select(-geometry) %>%
  # Now, if NaN or Infinite, set all of these to NA so that they don't mess up the averaging
  mutate_at(vars(voting_age, religious_orgs, advocacy_orgs, civic_orgs, unions, charitable_orgs),
            funs(if_else(!is.na(.) & !is.infinite(.), ., NA_real_))) %>%
  # Get average voting age per county subdivision
  group_by(geoid = code_csub) %>%
  summarize(voting_age = mean(voting_age, na.rm = TRUE),
            religious_orgs = mean(religious_orgs, na.rm = TRUE),
            advocacy_orgs = mean(advocacy_orgs, na.rm = TRUE),
            civic_orgs = mean(civic_orgs, na.rm = TRUE),
            unions = mean(unions, na.rm = TRUE),
            charitable_orgs = mean(charitable_orgs, na.rm = TRUE)) %>%
  ungroup() %>% distinct() %>%
  write_csv("index/temp/voting_age_tornado_nashville.csv")
 
```


```{r}
# So, grab county subdivisions, stored in NODES folder
readRDS("nodes/hurricane_dorian_florida.csv") %>% 
  mutate(code = code %>% str_pad(width = 10, side = "left", pad = "0")) %>%
  rename(code_csub = code) %>%
  # Spatially join the traits of points lying within these polygons
  st_join(readRDS("points/points_hurricane_dorian.csv") %>% 
            rename(code_tract = geoid)) %>%
  # Now left-join in census tract data like Voting Age
  left_join(by = "code_tract",
            y = read_csv("index/temp/voting_age.csv") %>% 
              mutate(GEOID = GEOID %>% str_pad(width = 11, side = "left", pad = "0")) %>%
              select(code_tract = GEOID, voting_age)) %>%
  # Now left-join in census tract data like zipcode business patterns info
  left_join(by = c("code_tract" = "GEOID"),
            y = read_csv("index/temp/establishments.csv") %>%
              mutate(GEOID = GEOID %>% str_pad(width = 11, side = "left", pad = "0"))) %>%
  as.data.frame() %>%
  select(-geometry) %>%
  # Now, if NaN or Infinite, set all of these to NA so that they don't mess up the averaging
  mutate_at(vars(voting_age, religious_orgs, advocacy_orgs, civic_orgs, unions, charitable_orgs),
            funs(if_else(!is.na(.) & !is.infinite(.), ., NA_real_))) %>%
  # Get average voting age per county subdivision
  group_by(geoid = code_csub) %>%
  summarize(voting_age = mean(voting_age, na.rm = TRUE),
            religious_orgs = mean(religious_orgs, na.rm = TRUE),
            advocacy_orgs = mean(advocacy_orgs, na.rm = TRUE),
            civic_orgs = mean(civic_orgs, na.rm = TRUE),
            unions = mean(unions, na.rm = TRUE),
            charitable_orgs = mean(charitable_orgs, na.rm = TRUE)) %>%
  ungroup() %>% distinct() %>%
  write_csv("index/temp/voting_age_hurricane_dorian.csv")
read_csv("index/temp/voting_age_hurricane_dorian.csv") %>% head() 
```

```{r}

# Bind together our county subdivision estimates into a single file
bind_rows(
  read_csv("index/temp/voting_age_california.csv") %>%
    mutate(geoid = geoid %>% as.character),
  read_csv("index/temp/voting_age_earthquake_puerto_rico.csv") %>%
    mutate(geoid = geoid %>% as.character),
  read_csv("index/temp/voting_age_flood_jackson.csv") %>%
    mutate(geoid = geoid %>% as.character),
  read_csv("index/temp/voting_age_tornado_nashville.csv") %>%
    mutate(geoid = geoid %>% as.character),
  read_csv("index/temp/voting_age_hurricane_dorian.csv") %>%
    mutate(geoid = geoid %>% as.character)
) %>%
  write_csv("index/temp/voting_age_csub.csv")
```




## 4.2. Combine


```{r}

read_better = function(file){
  read_csv(file = file) %>%
    mutate(GEOID = GEOID %>% as.character %>% str_pad(width = 10, side = "left", pad = "0")) %>%
    mutate(GEOID = GEOID %>% str_sub(-10, nchar(GEOID))) %>%
    mutate(GEOID = GEOID %>% str_pad(width = 10, side = "left", pad = "0")) %>%
    return()
}

read_better("index/temp/nonelder.csv") %>%
  select(GEOID, NAME) %>%
  distinct() %>%
  left_join(y = read_better("index/temp/communication_capacity.csv") %>%
              select(-NAME),
            by = "GEOID") %>%
  left_join(y = read_better("index/temp/education.csv") %>%
              select(-NAME),
            by = "GEOID") %>%
  left_join(y = read_better("index/temp/ethnicity_similarity.csv") %>%
              select(-NAME),
            by = "GEOID") %>%
  left_join(y = read_better("index/temp/employment.csv") %>%
              select(-NAME),
            by = "GEOID") %>%
  left_join(y = read_better("index/temp/gender_income_similarity.csv") %>%
              select(-NAME),
            by = "GEOID") %>%
  left_join(y = read_better("index/temp/gini_index.csv") %>%
              select(-NAME),
            by = "GEOID") %>%
  left_join(y = read_better("index/temp/gov_workers.csv") %>%
              select(-NAME),
            by = "GEOID") %>%
  left_join(y = read_better("index/temp/language_competency.csv") %>%
              select(-NAME),
            by = "GEOID") %>%
  left_join(y = read_better("index/temp/nonelder.csv") %>%
              select(-NAME),
            by = "GEOID") %>%
  left_join(y = read_better("index/temp/race_similarity.csv") %>%
              select(-NAME),
            by = "GEOID") %>%
  left_join(y = read_csv("index/temp/voting_age_csub.csv") %>%
              select(GEOID = geoid, voting_age),
            by = "GEOID") %>%
  left_join(y = read_csv("index/temp/establishments.csv")) %>%
  mutate(fips = GEOID %>% str_sub(1, 5)) %>%
  left_join(by = c("fips" = "GEOID"),
            y = read_csv("index/esri_county.csv")) %>%
  select(-fips) %>%
  write_csv("index/components.csv")
```




## 4.3. Generate Indices 

```{r}

#Grab a list of all census tracts available
read_csv("index/components.csv") %>% 
  # recode any Infinite responses as NA
  mutate_at(vars(telephone, education, ethnicity_similarity, gender_income_similarity,  
                 employment_equality, gini, local_gov, state_gov, fed_gov,
                 lang, non_elder, race_similarity, voting_age, 
                 religious_orgs, 
                 
                 # Unfortunately, these measures are missing too much data (10~50%)
                 # We will use county level measures for these instead
                 advocacy_orgs, civic_orgs, unions, charitable_orgs, 
                 charitable, civic, union, fraternal, politicalacts), 
            funs(if_else(is.infinite(.) | is.nan(.), NA_real_, .))) %>%
  # Now rescale any NEGATIVE effects, by making it 1 minus the fractionalization score,
  # such that 1 now equals homophily and 0 equals heterophily
  # gini index has already been rescaled.
  mutate_at(vars(race_similarity, ethnicity_similarity, 
                 education, employment_equality, gender_income_similarity), funs( (1 - .))) %>%
  # Now rescale ALL variables (with their new polarities) from a min of 0 to a max of 1
  mutate_at(vars(telephone, education, ethnicity_similarity, gender_income_similarity, 
                 gini, local_gov, state_gov, fed_gov,
                 lang, non_elder, race_similarity, voting_age, 
                 religious_orgs, advocacy_orgs, civic_orgs, unions, 
                 charitable_orgs, 
                 
                 charitable, civic, union, fraternal, politicalacts), 
            funs(. %>% scales::rescale())) %>%
  write_csv("index/components_rescaled.csv")
```




```{r, message = FALSE, warning = FALSE}
read_csv("index/components_rescaled.csv") %>%
  mutate(bonding = (telephone + education + ethnicity_similarity + employment_equality +
                      gender_income_similarity + gini + non_elder + 
                      lang + race_similarity) / 9 ) %>%
  mutate(linking = (local_gov + state_gov + fed_gov + politicalacts + voting_age) / 5) %>%
  mutate(bridging = (civic + union + charitable + religious_orgs + fraternal) / 5) %>%
  mutate(social_capital = (bonding + bridging + linking) / 3 ) %>%
  select(GEOID, NAME, social_capital, bonding, bridging, linking) %>%
  write_csv("index/index_county_sub.csv")
```


```{r}
read_csv("index/components_rescaled.csv") %>% 
#36080
  # Zoom into the 5868 county subdivisions in our study
  filter(GEOID %>% str_sub(1,2) %in% c("01", "03", "04", "05", "06", "12", 
                                       "13", "22", "28", "29", "32", "72", "78")) %>% 
    select(-NAME, -GEOID) %>%
    summary()


# Looks like nearly half of these are missing county business patterns data
# charitable acts (3399), unions (1311), civic orgs (2556), and advocacy orgs(3258)
# Reluctantly, I grab county level data for these variables.
```


```{r, message = FALSE, warning=FALSE, eval= FALSE}
set.seed(12345)
read_csv("index/components_rescaled.csv") %>%
  select(-civic_orgs, -charitable_orgs, -advocacy_orgs, -unions) %>%
  as.data.frame() %>%
  # Now exclude any cases from Puerto Rico or the Virgin Islands
  # Which are not frequently measured by the ACS
  filter(!GEOID %>% str_sub(1,2) %in% c("72", "78")) %>%
  Amelia::amelia(m = 5, idvars = c("GEOID", "NAME"), 
               max.resample = 1000, parallel = "multicore",
               bounds = data.frame(column.number = c(1:19) %>% as.numeric,
                                   lower.bound = 0,
                                   upper.bound = 1) %>% 
                 as.matrix()) %>%
  saveRDS(file = "index/mi.RData")
```


```{r}

ob <- readRDS(file = "index/mi.RData")

dir.create("index/imp")

calculate_index = function(data){
  data %>%
  # Pivot longer for easy tidy calculations
  pivot_longer(
    cols = -c(GEOID, NAME),
    names_to = "variable",
    values_to = "value") %>%
  # Classify each variable to be part of an index
  mutate(index = variable %>% recode(
    # Bridging
    "telephone" = "bonding",
    "education" = "bonding",
    "race_similarity" = "bonding",
    "ethnicity_similarity" = "bonding",
    "employment_equality" = "bonding",
    "gender_income_similarity" = "bonding",
    "gini" = "bonding",
    "lang" = "bonding",
    "non_elder" = "bonding",
    # Bridging
    "civic" = "bridging", 
    "union" = "bridging", 
    "charitable" = "bridging", 
    "religious_orgs" = "bridging",
    "fraternal" = "bridging",
    # Linking 
    "voting_age" = "linking",
    "local_gov" = "linking",
    "state_gov" = "linking",
    "fed_gov" = "linking",
    "politicalacts" = "linking")) %>%
  # Calculate the mean score (the index) and standard deviation (error for the index)
  # for each index for each census tract
  group_by(GEOID, NAME, index) %>%
  summarize(mean = mean(value),
            sd = sd(value)) %>%
  # Pivot wider to get separate columns # pivot_wider(
#    id_cols = c(GEOID, NAME),
#    names_from = index,
#    values_from = c(mean, sd)) %>%
  return()
    
}

ob$imputations %>%
  lapply(calculate_index) %>%
  bind_rows(.id = "imp") %>%
  # Pivot wider so that each row is one unique census tract
  pivot_wider(
    id_cols = c(GEOID, NAME, index),
    names_from = imp,
    values_from = c(mean, sd)) %>%
  write_csv("index/imp/imputed_datasets.csv")

```



```{r}

# Now, we want to generate single bonding, bridging, and linking indices,
# combining them and incorporating error within and between imputations 
# by merging them with Rubin's Rules

super_meld = function(data){
  # considering error within imputations and between imputations
  Amelia::mi.meld(
    q = data %>% 
      select(mean_imp1, mean_imp2, 
             mean_imp3, mean_imp4, mean_imp5),
    se = data %>% 
      select(sd_imp1, sd_imp2, 
             sd_imp3, sd_imp4, sd_imp5),
    byrow = FALSE) %>%
    # Bind list results together
    bind_rows() %>%
    # Add back in municipality code
    mutate(geoid = data$GEOID, name = data$NAME) %>%
    select(geoid, name, index = q.mi, index_se = se.mi) %>%
    return()
}

# Calculate Bonding Index
read_csv("index/imp/imputed_datasets.csv") %>%
  filter(index == "bonding") %>%
  super_meld() %>%
  write_csv("index/imp/bonding.csv")

# Calculate Bridging Index
read_csv("index/imp/imputed_datasets.csv") %>%
  filter(index == "bridging") %>%
  super_meld() %>%
  write_csv("index/imp/bridging.csv")

# Calculate Linking Index
read_csv("index/imp/imputed_datasets.csv") %>%
  filter(index == "linking") %>%
  super_meld() %>%
  write_csv("index/imp/linking.csv")


# Now take the mean of all three indices to create an overall social capital index
c("index/imp/bonding.csv", "index/imp/bridging.csv", "index/imp/linking.csv") %>%
  lapply(read_csv) %>%
  bind_rows(.id = "type") %>%
  mutate(type = type %>% recode(
    "1" = "bonding", 
    "2" = "bridging", 
    "3" = "linking")) %>%
  group_by(geoid, name) %>%
  summarize(index = mean(index, na.rm = TRUE)) %>%
  write_csv("index/imp/social_capital.csv")

# Now bind them all into one file
read_csv("index/imp/social_capital.csv") %>%
  rename(social_capital = index) %>%
  left_join(by = "geoid", 
            y = read_csv("index/imp/bonding.csv") %>% 
              select(geoid, bonding = index, bonding_se = index_se)) %>%
    left_join(by = "geoid", 
            y = read_csv("index/imp/bridging.csv") %>% 
              select(geoid, bridging = index, bridging_se = index_se)) %>%
    left_join(by = "geoid", 
            y = read_csv("index/imp/linking.csv") %>% 
              select(geoid, linking = index, linking_se = index_se)) %>%
  write_csv("index/index_county_sub_mi.csv")

remove(ob, calculate_index, super_meld)
```




## 4.4 Other Downloads

```{r}
tidycensus::census_api_key("97b29053a89e5396ef806bf7f4fae05b7387fabd")

dem = function(stateabbr){
  tidycensus::get_acs(
    year = 2018,
    state = stateabbr,
    geography = "county subdivision",
    variables = c(
      # Get population
      pop = "B01003_001",
      # Get median household income
      medincome = "B19013_001"),
    survey = "acs5") %>%
    as.data.frame() %>%
    return()
}
  
library(tidyverse)

# The virgin islands are not always surveyed 
c(state.abb) %>%
  lapply(dem) %>%
  bind_rows() %>%
  # Pivot and edit
  select(-NAME, -moe) %>%
  pivot_wider(
    id_cols = c(GEOID),
    names_from = variable,
    values_from = estimate) %>%
  # Export
  write_csv("index/demographics_csub.csv")
```


### Tally Nodes

```{r}
library(tidyverse)
library(sf)
# This tallies how many nodes were in each analysis

bind_rows(
  readRDS("nodes/hurricane_dorian_florida.csv") %>% 
    as.data.frame() %>% 
    select(code) %>%
    distinct(),
  
  readRDS("nodes/california.csv") %>% 
    as.data.frame() %>% 
    select(code) %>%
    distinct(),
  
  readRDS("nodes/tornado_nashville.csv") %>% 
    as.data.frame() %>% 
    select(code) %>%
    distinct(),
  
  readRDS("nodes/flood_jackson.csv") %>% 
    as.data.frame() %>% 
    select(code) %>%
    distinct(),
  
  readRDS("nodes/earthquake_pr.csv") %>% 
    as.data.frame() %>% 
    select(code) %>%
    distinct()) %>%
  distinct() %>%
  count()
  
  
  
#readRDS("nodes/japan_hagibis.csv") %>% 
#  as.data.frame() %>% 
#  select(muni_code) %>%
#  distinct() %>%
#  count()

```



# 5. Homophily Measures

```{r, message = FALSE, warning = FALSE}
library(tidyverse)
```

Given all the edges in a network, how often do they connect high/low population places? Etc.

```{r}
# Test it out!
#times <- read_csv("aggregate/flood_chiba.csv") %>%
#  select(datetime) %>% unique()

#increment = times$datetime[1]
#file = "flood_chiba.csv"


get_subset = function(increment){

  evac <- read_csv("aggregate/temp.csv") %>%
    # Grab just one time-step
    filter(datetime == increment) %>%
    # Create a measure of evacuation
    mutate(evacuation = if_else(n_crisis - n_baseline > 0, n_crisis - n_baseline, 0)) %>%
    filter(evacuation > 0) %>%
    select(-n_baseline, -n_crisis) %>%
    filter(!is.na(from_code) & !is.na(to_code)) %>%
   # Join in origin traits
    left_join(y = read_csv("nodes/temp.csv") %>%
                mutate_at(vars(social_capital, bonding, bridging, linking),
                          funs(dplyr::ntile(., 2) %>% recode("1" = "Low", "2" = "High"))) %>%
                select(code, from_sc = social_capital, 
                       from_bonding = bonding, 
                       from_bridging = bridging, from_linking = linking),
              by = c("from_code" = "code")) %>%
    left_join(y = read_csv("nodes/temp.csv") %>%
                mutate_at(vars(pop, income),
                          funs(dplyr::ntile(., 2) %>% recode("1" = "Low", "2" = "High"))) %>%
                select(code, from_pop = pop, from_income = income),
              by = c("from_code" = "code")) %>%
    # Join in destination traits
    left_join(y = read_csv("nodes/temp.csv") %>%
                mutate_at(vars(social_capital, bonding, bridging, linking),
                          funs(dplyr::ntile(., 2) %>% recode("1" = "Low", "2" = "High"))) %>%
                select(code, to_sc = social_capital, 
                       to_bonding = bonding, 
                       to_bridging = bridging, to_linking = linking),
              by = c("to_code" = "code")) %>%
    left_join(y = read_csv("nodes/temp.csv") %>%
                mutate_at(vars(pop, income),
                          funs(dplyr::ntile(., 2) %>% recode("1" = "Low", "2" = "High"))) %>%
                select(code, to_pop = pop, to_income = income),
              by = c("to_code" = "code"))
  
  
  compare = function(var){
    evac %>%
      # Get the total edges in the dataset
      #mutate(total_edges = n()) %>%
      group_by(!!sym(paste("from_", var, sep = "")), !!sym(paste("to_", var, sep = ""))) %>%
      summarize(
        # Calculate total evacuees (the sum of edge-weights)
        weight = sum(evacuation, na.rm = TRUE)) %>% ungroup() %>%
      
      mutate(total_weight = sum(weight)) %>%
      mutate(wpercent = weight / total_weight) %>%
      mutate(wpercent = wpercent %>% round(2)) %>%
      mutate(type = var) %>%
      rename(from = !!sym(paste("from_", var, sep = "")),
             to = !!sym(paste("to_", var, sep = ""))) %>%
      return()
  }
  
  
  # Generate stats
  result <- c("pop", "income", "sc", "bonding", "bridging", "linking") %>%
    lapply(compare) %>%
    bind_rows()
  
  if(!is.null(result)){
    result %>%
      mutate(datetime = increment) %>%
      return()
  }  
  
}

```

```{r, message = FALSE, warning = FALSE}


get_results = function(file){
  
  # Gather the appropriate nodelist
  if(file %>% str_detect(pattern = "japan|chiba")){
    readRDS(paste("nodes/", file, sep = "")) %>%
      as.data.frame() %>%
      select(muni_code) %>%
      filter(!is.na(muni_code)) %>%
      unique() %>%
      left_join(by = "muni_code",
                y = read_csv("indices_wide.csv") %>% 
                  select(muni_code, social_capital = social_capital_2017,
                         bonding = bonding_2017, 
                         bridging = bridging_2017, linking = linking_2017)) %>%
      left_join(by = "muni_code", 
                y = read_csv("japan_timeseries_data.csv") %>%
                  filter(year == 2017) %>%
                  select(muni_code, pop, income = income_per_capita)) %>%
      rename(code = muni_code) %>%
      write_csv("nodes/temp.csv")
  }
  
  if(file %>% str_detect(pattern = "jackson|nashville|florida")){
    readRDS(paste("nodes/", file, sep = "")) %>%
      as.data.frame() %>%
      select(code) %>%
      filter(!is.na(code)) %>%
      unique() %>%
      left_join(y = read_csv("index/index_county_sub_mi.csv"),
                by = c("code" = "geoid")) %>%
      left_join(y = read_csv("index/demographics_csub.csv") %>% rename(income = medincome),
                by = c("code" = "GEOID")) %>%
      write_csv("nodes/temp.csv")
  }
      
  
  if(file %>% str_detect(pattern = "cave_fire|hillside|getty|shutoff")){
    readRDS("nodes/california.csv") %>%
      as.data.frame() %>%
      select(code) %>%
      filter(!is.na(code)) %>%
      unique() %>%
      left_join(y = read_csv("index/index_county_sub_mi.csv"),
                by = c("code" = "geoid")) %>%
      left_join(y = read_csv("index/demographics_csub.csv") %>% rename(income = medincome),
                by = c("code" = "GEOID")) %>%
      write_csv("nodes/temp.csv")
  }
  
  # Get the date-and-time range for your edgelist 
  time <- read_csv(paste("aggregate/", file, sep = "")) %>%
    mutate(evacuation = if_else(n_crisis - n_baseline > 0, n_crisis - n_baseline, 0)) %>%
    filter(evacuation > 0) %>%
    select(datetime) %>%
    unique()
  
  
  # Save the edgelist as a specific dataset with the name temp.csv
  read_csv(paste("aggregate/", file, sep = "")) %>%
    write_csv("aggregate/temp.csv")
  
  # Run the analysis on each temporal subset of the data
  time$datetime %>%
    lapply(get_subset) %>%
    bind_rows() %>%
    mutate(event = file) %>%
    return()
  
}

```


```{r, message = FALSE, warning = FALSE}
aggregated_files <- dir("aggregate") %>% 
  as.data.frame() %>% 
  filter(. != "temp.csv") %>%
  unlist() %>% as.character() 


c(
  
  "flood_chiba.csv",
 "japan_hagibis.csv",
 "flood_jackson.csv", "tornado_nashville.csv",
  "southern_california_shutoff.csv", # Looks like this one is better
  "getty_fire.csv",
  "hillside_fire_san_bernardino.csv",
  "cave_fire_santa_barbara.csv"
) %>%
  lapply(get_results) %>%
  bind_rows() %>%
  bind_rows(
    "hurricane_dorian_florida.csv" %>%
      get_results()
  ) %>%
  write_csv("homophily_stats.csv")

remove(get_network, get_results, get_stats, get_subset)
```


```{r}

read_csv("homophily_stats.csv") %>%
  left_join(by = c("event", "datetime"),
            
            # Merge in time
            y = read_csv("homophily_stats.csv") %>%
              select(event, datetime) %>%
              distinct() %>%
              group_by(event) %>%
              arrange(datetime) %>%
              mutate(time = 1:n())) %>%
  arrange(event, time) %>%
  write_csv("homophily_stats.csv")
```




## 5.1 Visualize

```{r}
# The easiest way to do this is
# Get for each date
times <- read_csv("homophily_stats.csv") %>%
  mutate(date = datetime %>% word(1)) %>%
  select(event, date) %>%
  distinct() %>%
  group_by(event) %>%
  mutate(time = 1:n())


# Aggregate from 8-hour to daily level
dat <- read_csv("homophily_stats.csv") %>%
  # Create a daily indicator, for day 1, day 2, etc.
  mutate(date = datetime %>% word(1)) %>%
  left_join(y = times %>% rename(daytime = time),
            by = c("event", "date")) %>%
  # Calculate the total weight per time unit
  group_by(daytime, type, event, from, to) %>%
  summarize(weight = sum(weight, na.rm = TRUE)) %>%
  # Now calculate total weight
  group_by(daytime, type, event) %>%
  mutate(total = sum(weight, na.rm = TRUE)) %>% 
  # Calculate daily percentage
  ungroup() %>%
  mutate(wpercent = weight / total) %>%
  rename(measure = type) %>%
  # Relabel Disaster Events
   # recode names
  mutate(name = event %>% recode_factor(
    "southern_california_shutoff.csv" = "Southern California Shutoff",
    "cave_fire_santa_barbara.csv" = "Cave Fire in Santa Barbara",
    "getty_fire.csv" = "Getty Fire",
    "hillside_fire_san_bernardino.csv" = "Hillside Fire in San Bernardino",
    "earthquake_pr.csv" = "Earthquake in Southern Puerto Rice",
    "hurricane_dorian_florida.csv" = "Hurricane Dorian in Florida",
    "japan_hagibis.csv" = "Typhoon Hagibis in Japan",
    "flood_chiba.csv" = "Flood in Chiba, Japan",
    "flood_jackson.csv" = "Flood in Jackson, Mississippi",
    "tornado_nashville.csv" = "Tornado in Nashville")) %>%
  mutate(type = event %>% recode_factor(
    "hurricane_dorian_florida.csv" = "Storm",
    "japan_hagibis.csv" = "Storm",
    "tornado_nashville.csv" = "Tornado",
    "flood_chiba.csv" = "Flood",
    "flood_jackson.csv" = "Flood",
    "getty_fire.csv" = "Fire",
    "hillside_fire_san_bernardino.csv" = "Fire",
    "cave_fire_santa_barbara.csv" = "Fire",
    "earthquake_pr.csv" = "Earthquake",
    "southern_california_shutoff.csv" = "Power Outage")) %>%
  mutate(area = event %>%  recode_factor(
    "hurricane_dorian_florida.csv" = "South",
    "flood_jackson.csv" = "South",
    "tornado_nashville.csv" = "South",
    "getty_fire.csv" = "California",
    "hillside_fire_san_bernardino.csv" = "California",
    "cave_fire_santa_barbara.csv" = "California",
    "southern_california_shutoff.csv" = "California",
    "earthquake_pr.csv" = "Puerto Rico",
    "japan_hagibis.csv" = "Japan",
    "flood_chiba.csv" = "Japan")) %>%
    mutate(area2 = area %>%  recode_factor(
      "South" = "US",
      "California" = "US",
      "Puerto Rico" = "US",
      "Japan" = "Japan")) %>%
  mutate(measure = measure %>% recode_factor(
    "sc" = "Social Capital",
    "bonding" = "Bonding Social Capital",
    "bridging" = "Bridging Social Capital",
    "linking" = "Linking Social Capital",
    "pop" = "Population",
    "income" = "Income per capita"))
```



### 5.1 Overall Social Capital

```{r}
dat %>% 
  filter(measure == "Social Capital") %>%
  filter(!is.na(from) & !is.na(to)) %>%
  mutate(fromto = paste(from,to, sep = "→")) %>%
  ggplot(mapping = aes(x = daytime, y = wpercent, fill = fromto, group = fromto)) +
  geom_bar(position="fill", stat="identity") +
  facet_wrap(~name) +
  xlim(0, 15) +
  viridis::scale_fill_viridis(discrete = TRUE, option = "plasma",
                               begin = 0.25, end = 0.90) +
 theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5)) +
  labs(x = "Time (days)", fill = "Moves by Type", 
       y = "(%) Evacuees",
       title = "Evacuation by Social Capital of Origin and Destination")
```

What is note-worthy about this table is that most inter-city evacuation during fires occurred between communities with weak overall social capital. However, this was not true for other disaster types. Hurricane Dorian, the Chiba Floods, the Jackson Flood, and the Tronado in Washington all showed a majority of evacuation between high social capital communities. Only Typhoon Hagibis in Japan was an outlier.


### 5.2 Bonding Social Capital

```{r}
dat %>% 
  filter(measure == "Bonding Social Capital") %>%
  filter(!is.na(from) & !is.na(to)) %>%
  mutate(fromto = paste(from,to, sep = "→")) %>%
  ggplot(mapping = aes(x = daytime, y = wpercent, fill = fromto, group = fromto)) +
  geom_bar(position="fill", stat="identity") +
  facet_wrap(~name) +
  xlim(0, 15) +
  viridis::scale_fill_viridis(discrete = TRUE, option = "plasma",
                               begin = 0.25, end = 0.90) +
  theme(plot.title = element_text(hjust = 0.5)) +
  theme_minimal() +
  labs(x = "Time (days)", fill = "Moves by Type", 
       y = "(%) Evacuees",
       title = "Evacuation by Bonding Social Capital of Origin and Destination")
```

Curiously, almost all evacuees in every documented disaster were moving between communities with low amounts of bonding social capital.
This raises interesting questions. If high overall social capital is related to evacuation in most disasters except fires, but low bonding social capital is related to most all disasters, do bridging and linking social capital make up the difference in effect?


### 5.3 Bridging Social Capital

```{r}
dat %>% 
  filter(measure == "Bridging Social Capital") %>%
  filter(!is.na(from) & !is.na(to)) %>%
  mutate(fromto = paste(from,to, sep = "→")) %>%
  ggplot(mapping = aes(x = daytime, y = wpercent, fill = fromto, group = fromto)) +
  geom_bar(position="fill", stat="identity") +
  facet_wrap(~name) +
  xlim(0, 15) +
  viridis::scale_fill_viridis(discrete = TRUE, option = "plasma",
                               begin = 0.25, end = 0.90) +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5)) +
  labs(x = "Time (days)", fill = "Moves by Type", 
       y = "(%) Evacuees",
       title = "Evacuation by Bridging Social Capital of Origin and Destination")
```

This study finds considerable variation in the effect of bridging social capital on evacuation. For example, all disasters except for the Cave Fire, the Hillside Fire, and Typhoon Hagibis saw a majority of evacuation occurring between communities with high bridging social capital.


### 5.4 Linking Social Capital

```{r}
dat %>% 
  filter(measure == "Linking Social Capital") %>%
  filter(!is.na(from) & !is.na(to)) %>%
  mutate(fromto = paste(from,to, sep = "→")) %>%
  ggplot(mapping = aes(x = daytime, y = wpercent, fill = fromto, group = fromto)) +
  geom_bar(position="fill", stat="identity") +
  facet_wrap(~name) +
  xlim(0, 15) +
  viridis::scale_fill_viridis(discrete = TRUE, option = "plasma",
                               begin = 0.25, end = 0.90) +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5)) +
  labs(x = "Time (days)", fill = "Moves by Type", 
       y = "(%) Evacuees",
       title = "Evacuation by Linking Social Capital of Origin and Destination")
```

For linking social capital, results were more mixed. Hurricane Dorian, the Chiba Floods, the Flood in Jackson, and the Tornado in Nashville saw communities evacuate considerably between high linking capital communities. However, other cases saw a more mixed effect. Most all fires saw high evacuation between low linking capital communities, as did Typhoon Hagibis.


### 5.5 Population

```{r}
dat %>% 
  filter(measure == "Population") %>%
  filter(!is.na(from) & !is.na(to)) %>%
  mutate(fromto = paste(from,to, sep = "→")) %>%
  ggplot(mapping = aes(x = daytime, y = wpercent, fill = fromto, group = fromto)) +
  geom_bar(position="fill", stat="identity") +
  facet_wrap(~name) +
  xlim(0, 15) +
  viridis::scale_fill_viridis(discrete = TRUE, option = "plasma",
                               begin = 0.25, end = 0.90) +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5)) +
  labs(x = "Time (days)", fill = "Moves by Type", 
       y = "(%) Evacuees",
       title = "Evacuation by Population of Origin and Destination")
```


Somewhat unsurprisingly, the majority of evacuation occurs between locales with population over the median. This makes sense, because *more people are in these cities*, and therefore *more people can evacuate*. What is *interesting*, however, is that so many people in urban areas *go to other* urban areas, but not to rural areas.


### 5.6 Income

```{r}
dat %>% 
  filter(measure == "Income per capita") %>%
  filter(!is.na(from) & !is.na(to)) %>%
  mutate(fromto = paste(from,to, sep = "→")) %>%
  ggplot(mapping = aes(x = daytime, y = wpercent, fill = fromto, group = fromto)) +
  geom_bar(position="fill", stat="identity") +
  facet_wrap(~name) +
  xlim(0, 15) +
  viridis::scale_fill_viridis(discrete = TRUE, option = "plasma",
                               begin = 0.25, end = 0.90) +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5)) +
  labs(x = "Time (days)", fill = "Moves by Type", 
       y = "(%) Evacuees",
       title = "Evacuation by Income per capita of Origin and Destination")
```

This chart shows that for most disasters studied, the majority of evacuees travelled between wealthier areas. This makes sense; evacuation is harder when financial resources are scarce. However, some disasters show significant amounts of evacuation among poorer communities. This occurred during the Southern California Shutoff, the Getty Fire, the Flood in Jackson, Mississippi, and the Tornado in Nashville.  Finally, a few interesting details remain. After Typhoon Hagibis, the Hillside Fire, and the Tornado in Nashville, evacuees moved most frequently between high income communities. Meanwhile, evacuation between low income communities was delayed by several days. This highlights the first 72-hours of any disaster, that many city officials highlight as critical to saving lives, because *most* members of the population are rendered immobile and struggle to access resources during this time.


### 5.7 Final Chart

Let's recreate these charts, but in aggregate form so as to save space.

```{r, fig.width= 9.5}
dat %>%
  group_by(measure, name, from, to) %>%
  summarize(weight = sum(weight, na.rm = TRUE)) %>%
  filter(!is.na(from) & !is.na(to)) %>% ungroup() %>%
  mutate(fromto = paste(from,to, sep = "→")) %>%
  mutate(measure = measure %>% recode_factor(
   "Population" =  "Population", 
   "Income per capita" = "Income per capita", 
   "Social Capital" = "Social Capital",
    "Bonding Social Capital" = "Bonding \n Social Capital",
   "Bridging Social Capital" = "Bridging \n Social Capital",
   "Linking Social Capital" = "Linking \n Social Capital")) %>%
  ggplot(mapping = aes(x = reorder(name, desc(name)), y = weight, fill = fromto, group = fromto)) +
  geom_bar(position="fill", stat="identity") +
  facet_wrap(~measure) +
  coord_flip() +
  scale_y_continuous(breaks = c(0, 0.25, 0.5, 0.75, 1), labels = c(0, 25, 50, 75, 100)) +
  viridis::scale_fill_viridis(discrete = TRUE, option = "plasma",
                               begin = 0.25, end = 0.90) +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5),
        panel.spacing = unit(0.25, "cm"),
        axis.ticks = element_blank(),
        panel.grid = element_blank(),
        plot.caption = element_text(hjust = 0.5),
        legend.position = "bottom") +
  labs(x = "", fill = "Moves by City Demographic Type", 
       y = "(%) Evacuees",
       title = "Evacuation by Origin and Destination Demographics",
       caption = "High refers to cities with demographic above median; low refers to below median. \n Bars represent share of evacuees who when between each kind of city pair.") +
  guides(fill = guide_legend(reverse = TRUE))


```

## 5.8 Statistical Analysis

Why do some communities see greater homophily by social capital than others? (for example)


```{r}
dat <- read_csv("homophily_stats.csv") %>%
      rename(measure = type) %>%
  # Relabel Disaster Events
   # recode names
  mutate(name = event %>% recode_factor(
    "southern_california_shutoff.csv" = "Southern California Shutoff",
    "cave_fire_santa_barbara.csv" = "Cave Fire in Santa Barbara",
    "getty_fire.csv" = "Getty Fire",
    "hillside_fire_san_bernardino.csv" = "Hillside Fire in San Bernardino",
    "earthquake_pr.csv" = "Earthquake in Southern Puerto Rice",
    "hurricane_dorian_florida.csv" = "Hurricane Dorian in Florida",
    "japan_hagibis.csv" = "Typhoon Hagibis in Japan",
    "flood_chiba.csv" = "Flood in Chiba, Japan",
    "flood_jackson.csv" = "Flood in Jackson, Mississippi",
    "tornado_nashville.csv" = "Tornado in Nashville")) %>%
  mutate(type = event %>% recode_factor(
    "hurricane_dorian_florida.csv" = "Storm",
    "japan_hagibis.csv" = "Storm",
    "tornado_nashville.csv" = "Tornado",
    "flood_chiba.csv" = "Flood",
    "flood_jackson.csv" = "Flood",
    "getty_fire.csv" = "Fire",
    "hillside_fire_san_bernardino.csv" = "Fire",
    "cave_fire_santa_barbara.csv" = "Fire",
    "earthquake_pr.csv" = "Earthquake",
    "southern_california_shutoff.csv" = "Power Outage")) %>%
  mutate(area = event %>%  recode_factor(
    "hurricane_dorian_florida.csv" = "South",
    "flood_jackson.csv" = "South",
    "tornado_nashville.csv" = "South",
    "getty_fire.csv" = "California",
    "hillside_fire_san_bernardino.csv" = "California",
    "cave_fire_santa_barbara.csv" = "California",
    "southern_california_shutoff.csv" = "California",
    "earthquake_pr.csv" = "Puerto Rico",
    "japan_hagibis.csv" = "Japan",
    "flood_chiba.csv" = "Japan")) %>%
    mutate(area2 = area %>%  recode_factor(
      "South" = "US",
      "California" = "US",
      "Puerto Rico" = "US",
      "Japan" = "Japan")) %>%
  mutate(measure = measure %>% recode_factor(
    "sc" = "Social Capital",
    "bonding" = "Bonding Social Capital",
    "bridging" = "Bridging Social Capital",
    "linking" = "Linking Social Capital",
    "pop" = "Population",
    "income" = "Income per capita")) %>%
  mutate(hour = datetime %>% word(2, sep = " ") %>% str_sub(1,2)) %>%
  # Set power outages to be the baseline, since it is the most unusual,
  # and we really want to know the effect of disasters like storms on evacuation
  # Also, earthquakes aren't available for demographic comparisons
  mutate(type = type %>% factor %>% relevel(ref = "Power Outage"))   
```


```{r}

m1 <- dat %>%
  filter(measure == "Social Capital") %>%
  filter(from == "High", to == "High") %>%
  mutate(area2 = area2 %>% factor() %>% relevel(ref = "Japan")) %>%
  lm(formula = wpercent ~ area2 + type + time + hour)


m2 <- dat %>%
  filter(measure == "Bonding Social Capital") %>%
  filter(from == "High", to == "High") %>%
  mutate(area2 = area2 %>% factor() %>% relevel(ref = "Japan")) %>%
  lm(formula = wpercent ~ area2 + type + time + hour) 

m3 <- dat %>%
  filter(measure == "Bridging Social Capital") %>%
  filter(from == "High", to == "High") %>%
  mutate(area2 = area2 %>% factor() %>% relevel(ref = "Japan")) %>%
  lm(formula = wpercent ~ area2 + type + time + hour) 

m4 <- dat %>%
  filter(measure == "Linking Social Capital") %>%
  filter(from == "High", to == "High") %>%
  mutate(area2 = area2 %>% factor() %>% relevel(ref = "Japan")) %>%
  lm(formula = wpercent ~ area2 + type + time + hour) 

m5 <- dat %>%
  filter(measure == "Income per capita") %>%
  filter(from == "High", to == "High") %>%
  mutate(area2 = area2 %>% factor() %>% relevel(ref = "Japan")) %>%
  lm(formula = wpercent ~ area2 + type + time + hour) 

m6 <- dat %>%
  filter(measure == "Population") %>%
  filter(from == "High", to == "High") %>%
  mutate(area2 = area2 %>% factor() %>% relevel(ref = "Japan")) %>%
  lm(formula = wpercent ~ area2 + type + time + hour) 
# install.packages("texreg")

texreg::htmlreg(
  list(m1,m2,m3,m4,m5,m6),
  custom.model.names = c("Social Capital", "Bonding Social Capital",
                         "Bridging Social Capital", "Linking Social Capital",
                         "Income per capita", "Population"),
  override.se = list(robust(m1)$std_error, robust(m2)$std_error, robust(m3)$std_error, 
                     robust(m4)$std_error, robust(m5)$std_error, robust(m6)$std_error),
  override.pvalues = list(robust(m1)$p_value, robust(m2)$p_value, robust(m3)$p_value, 
                          robust(m4)$p_value, robust(m5)$p_value, robust(m6)$p_value),
  custom.coef.map = list(
    "typeStorm" = "Storm",
    "typeTornado" = "Tornado",
    "typeFlood" = "Flood",
    "typeFire" = "Fire",
    "area2US" = "US",
    "time" = "Time",
    "hour08" = "Day Time",
    "hour16" = "Night Time",
    "(Intercept)" = "Constant"),
  custom.note = "Statistically significant effects depicted by *** at the p < 0.001 level, ** at the p < 0.01 level, and * at the p < 0.05 level.",
  file = "table_2.html",
  bold = 0.05,
  digits = 2)

```

```{r}

list(m1, m2, m3, m4, m5, m6) %>%
  lapply(robust) %>%
  bind_rows(.id = "outcome") %>%
  mutate(outcome = outcome %>% recode_factor(
    "4" = "Linking Social Capital",
    "3" = "Bridging Social Capital",
    "2" = "Bonding Social Capital",
    "1" = "Social Capital",
    "5" = "Income per capita",
    "6" = "Population")) %>%
  mutate(statistic = if_else(p_value < 0.05, statistic, NA_real_)) %>%
  mutate(term = term %>% recode_factor(
    "typeStorm" = "Storm",
    "typeTornado" = "Tornado",
    "typeFlood" = "Flood",
    "typeFire" = "Fire",
    "area2US" = "US",
    "time" = "Time",
    "hour08" = "Daytime",
    "hour16" = "Nightime",
    "(Intercept)" = "Constant")) %>%
  filter(term %in% c("Fire", "Flood", "Tornado", "Storm")) %>%
  ggplot(mapping = aes(x = term, y = outcome, fill = statistic, label = estimate %>% round(2))) +
  geom_tile() +
  geom_text() +
  scale_fill_gradient(low = "azure2", high = "steelblue", na.value = "white") +
#  viridis::scale_fill_viridis(na.value = "white",  end = 0.95) +
  # scale_fill_gradient2(midpoint = 0, low = "firebrick", mid = "white", high = "steelblue", na.value = "white") +
  # make a fancy think colorbar
  guides(fill = guide_colourbar(barwidth = 0.5, barheight = 10)) +
  # center plot title and caption
  theme_bw() +
  theme(
    plot.title = element_text(hjust = 0.5),
    plot.caption = element_text(hjust = 0.5),
         strip.background = element_rect(fill=NA,colour=NA),
      panel.spacing=unit(0.25,"cm"), 
      panel.grid.major = element_blank(), 
      panel.grid.minor = element_blank(),
      axis.title.y = element_blank(),
      axis.ticks = element_blank()
    ) +
  # Label plot
  labs(
    title = "Effect of Disaster Type on Evacuation Network Homophily",
    x = "Disaster Type", y = "", color = "t-Statistic",
    caption = c("Tiles are colored by the size of their standardized test statistic. Labels reflect the beta coefficient of each statistic. \n Tiles in white are not statistically significant."))


```


# 6. Chord Diagrams


```{r, result = FALSE}
library(sf)
library(tidygraph)

dir.create("network")

get_aggregate = function(file){

# Relabel timing of days
timing <- read_csv(paste("aggregate/", file, ".csv", sep = "")) %>% 
  select(datetime) %>% 
  unique() %>%
  arrange(datetime) %>%
  mutate(time = c(1:n()) %>% as.numeric())

# Generate aggregated network over 2 weeks
read_csv(paste("aggregate/", file, ".csv", sep = "")) %>%
  filter(!is.na(from_code) & !is.na(to_code)) %>%
  mutate(evacuation = if_else(n_crisis - n_baseline > 0, n_crisis - n_baseline, 0)) %>%
  # Merge in timing and filter to just first 42 periods (14 days)
  left_join(by = "datetime", y = timing) %>%
  filter(time %in% 1:42) %>% 
  # aggregate to entire first 2 weeks
  group_by(from_code, to_code) %>%
  summarize(evacuation = sum(evacuation, na.rm = TRUE)) %>%
  select(from_code, to_code, evacuation) %>%
  filter(evacuation > 0) %>%
  write_csv(paste("network/edgelist_", file, ".csv", sep = ""))

}

# Generate aggregated-edgelists for each disaster (except Puerto Rico's)
# (we don't have indices with which to evaluate PR)
c("japan_hagibis", "flood_chiba", "getty_fire", 
  "southern_california_shutoff", "tornado_nashville", 
  "flood_jackson", "hillside_fire_san_bernardino",
  "cave_fire_santa_barbara", "hurricane_dorian_florida") %>%
  lapply(get_aggregate)
```



```{r}
dir.create("networkfiles")



get_network = function(file){
  
  # Get graph object
  g <- tidygraph::tbl_graph(
    edges = read_csv(paste("network/edgelist_", file, ".csv", sep = "")) %>%
      mutate_at(vars(from_code, to_code), funs(. %>% as.character())),
    nodes = data.frame(code = c(
      read_csv(paste("network/edgelist_", file, ".csv", sep = ""))$from_code,
      read_csv(paste("network/edgelist_", file, ".csv", sep = ""))$to_code) %>% unique(), 
      stringsAsFactors = FALSE) %>% mutate(code = as.character(code)),
    directed = TRUE) %>%
    activate(nodes)
  
  # If the file is a Japanese case
  if(file %in% c("japan_hagibis", "flood_chiba")){
    g <- g %>%
      # Join in social capital traits from Japanese index
      left_join(y = read_csv("indices_wide.csv") %>%
                  select(muni_code,
                         social_capital = social_capital_2017, 
                         bonding = bonding_2017, bridging = bridging_2017, 
                         linking = linking_2017),
                by = c("code" = "muni_code"))
  }
  
  # If the file is a US case
  if(!file %in% c("japan_hagibis", "flood_chiba")){
    g <- g %>%
      # Join in social capital traits from US index
      left_join(y = read_csv("index/index_county_sub_mi.csv") %>% 
                  select(geoid, social_capital, bonding, bridging, linking),
                by = c("code" = "geoid"))
  }
  
  
  g %>%
    activate(edges) %>%
    # Arrange
    arrange(desc(evacuation)) %>%
    # Now add in the social capital traits from nodes to classify edges
    # categorizing them into high or low
    mutate(to_social_capital = .N()$social_capital[to] %>% 
             ntile(2) %>% recode("1" = "Low", "2" = "High"),
           from_social_capital = .N()$social_capital[from] %>% 
             ntile(2) %>% recode("1" = "Low", "2" = "High")) %>%
    mutate(type = paste(from_social_capital, to_social_capital, sep = "→")) %>%
    mutate(type = if_else(type %>% str_detect(pattern = "NA"), NA_character_, type)) %>%
    filter(!is.na(type)) %>%
    # Calculate degree
    tidygraph::activate(nodes) %>%
    mutate(Indegree = tidygraph::centrality_degree(weights = evacuation, mode = "in")) %>%
    saveRDS(paste("networkfiles/", file, ".rds", sep = ""))
}

# Generate the tidygraph network for each of these disasters
c("japan_hagibis", "flood_chiba", "getty_fire", 
  "southern_california_shutoff",  "tornado_nashville", 
  "flood_jackson", "hillside_fire_san_bernardino",
  "cave_fire_santa_barbara", "hurricane_dorian_florida") %>%
lapply(get_network)

```



```{r}
library(tidygraph)
library(ggraph)
# Get graph object

chord = function(data){
  require(tidygraph)
  require(ggraph)
  data %>%
    # Create chord plot
    ggraph(layout = "linear", circular = TRUE, sort.by = social_capital) +
    geom_edge_arc(aes(color = type), alpha = 0.5) + 
    # ad
    geom_node_point(aes(size = Indegree), color = "grey", alpha = 0.5) + 
    theme_graph() +
    scale_edge_colour_viridis(
      discrete = TRUE,
      option = "plasma", begin = 0.0, end = 0.8)
}

read_better = function(file){
  require(tidygraph)
  readRDS(paste("networkfiles/", file, ".rds", sep = "")) %>%
#    activate(nodes) %>%
#    mutate(type = file) %>%
    return()
}

visualize = function(file){

file %>% read_better()  %>%
  chord() %>%
  ggsave(filename = paste("networkimage/", file, ".png", sep = ""), dpi = 500)
  
}
```


```{r}
dir.create("networkimage")

c("japan_hagibis", "flood_chiba", "getty_fire", 
  # "southern_california_shutoff",  
  "tornado_nashville", 
  "flood_jackson", "hillside_fire_san_bernardino",
  "cave_fire_santa_barbara"
  #"hurricane_dorian_florida"
  ) %>%
  lapply(visualize)

"hurricane_dorian_florida" %>% visualize()

"southern_california_shutoff" %>% visualize()
```






```{r}
library(tidygraph)
library(sf)

g <- tidygraph::tbl_graph(
  edges = read_csv("network/edgelist_japan_hagibis.csv"),
  nodes = readRDS("nodes/japan_hagibis.csv") %>% 
    as.data.frame() %>% 
    select(muni_code, muni, pref) %>%
     distinct() %>%
    mutate(id = 1:n()) %>%
    left_join(y = read_csv("indices_wide.csv") %>%
                select(muni_code,
                       social_capital = social_capital_2017, 
                       bonding = bonding_2017, bridging = bridging_2017, 
                       linking = linking_2017),
              by = c("muni_code")) %>%
    mutate(muni = muni %>% word(2)) %>%
    # Filter to just those actually in the edgelist
    filter(muni_code %in% unique(
      c(read_csv("network/edgelist_japan_hagibis.csv")$from_code,
        read_csv("network/edgelist_japan_hagibis.csv")$to_code))), 
  directed = TRUE) %>%
  activate(edges) %>% 
  arrange(desc(evacuation)) %>%
  # Now add in the social capital traits from nodes to classify edges
  # categorizing them into high or low
  mutate(to_social_capital = .N()$social_capital[to] %>% 
           ntile(2) %>% recode("1" = "Low", "2" = "High"),
         from_social_capital = .N()$social_capital[from] %>% 
           ntile(2) %>% recode("1" = "Low", "2" = "High")) %>%
  mutate(type = paste(from_social_capital, to_social_capital, sep = "→")) %>%
  mutate(type = if_else(type %>% str_detect(pattern = "NA"), NA_character_, type)) %>%
  filter(!is.na(type)) %>%
    # Calculate degree
  activate(nodes) %>%
  mutate(centrality = tidygraph::centrality_degree(weights = evacuation, mode = "in")) 


g


  # plot
  ggraph(layout = 'linear', circular = FALSE) + 
    geom_edge_arc(mapping = aes(color = factor(type)), alpha = 0.5) +
#  geom_conn_bundle(data = get_con(from = from, to = to), alpha=0.2, width=0.9, aes(color=type)) +
  geom_node_point(aes(size=centrality, alpha=0.2)) +
  scale_size_continuous( range = c(0.1,10) ) 


g %>%
  # Calculate degree
  tidygraph::activate(nodes) %>%
  mutate(centrality = tidygraph::centrality_degree(weights = evacuation, mode = "in")) %>%
  # plot
  ggraph(layout = 'stress') + 
    geom_edge_arc(mapping = aes(color = factor(type)), alpha = 0.5) +
#  geom_conn_bundle(data = get_con(from = from, to = to), alpha=0.2, width=0.9, aes(color=type)) +
  geom_node_point(aes(size=centrality, alpha=0.2)) +
  scale_size_continuous( range = c(0.1,10) ) +
  theme_void()

```





# 6. Chord Diagrams

```{r}
# Test it out!
#times <- read_csv("aggregate/flood_chiba.csv") %>%
#  select(datetime) %>% unique()

#increment = times$datetime[1]
#file = "flood_chiba.csv"


get_subset = function(increment){

  evac <- read_csv("aggregate/temp.csv") %>%
    # Grab just one time-step
    filter(datetime == increment) %>%
    # Create a measure of evacuation
    mutate(evacuation = if_else(n_crisis - n_baseline > 0, n_crisis - n_baseline, 0)) %>%
    filter(evacuation > 0) %>%
    select(-n_baseline, -n_crisis) %>%
    filter(!is.na(from_code) & !is.na(to_code)) %>%
   # Join in origin traits
    left_join(y = read_csv("nodes/temp.csv") %>%
                mutate_at(vars(social_capital, bonding, bridging, linking),
                          funs(dplyr::ntile(., 5))) %>%
                select(code, from_sc = social_capital, 
                       from_bonding = bonding, 
                       from_bridging = bridging, from_linking = linking),
              by = c("from_code" = "code")) %>%
    left_join(y = read_csv("nodes/temp.csv") %>%
                mutate_at(vars(pop, income),
                          funs(dplyr::ntile(., 5))) %>%
                select(code, from_pop = pop, from_income = income),
              by = c("from_code" = "code")) %>%
    # Join in destination traits
    left_join(y = read_csv("nodes/temp.csv") %>%
                mutate_at(vars(social_capital, bonding, bridging, linking),
                          funs(dplyr::ntile(., 5))) %>%
                select(code, to_sc = social_capital, 
                       to_bonding = bonding, 
                       to_bridging = bridging, to_linking = linking),
              by = c("to_code" = "code")) %>%
    left_join(y = read_csv("nodes/temp.csv") %>%
                mutate_at(vars(pop, income),
                          funs(dplyr::ntile(., 5))) %>%
                select(code, to_pop = pop, to_income = income),
              by = c("to_code" = "code"))
  
  
  compare = function(var){
    evac %>%
      # Get the total edges in the dataset
      #mutate(total_edges = n()) %>%
      group_by(!!sym(paste("from_", var, sep = "")), !!sym(paste("to_", var, sep = ""))) %>%
      summarize(
        # Calculate total evacuees (the sum of edge-weights)
        weight = sum(evacuation, na.rm = TRUE)) %>% ungroup() %>%
      
      mutate(total_weight = sum(weight)) %>%
      mutate(wpercent = weight / total_weight) %>%
      mutate(wpercent = wpercent %>% round(2)) %>%
      mutate(type = var) %>%
      rename(from = !!sym(paste("from_", var, sep = "")),
             to = !!sym(paste("to_", var, sep = ""))) %>%
      return()
  }
  
  
  # Generate stats
  result <- c("pop", "income", "sc", "bonding", "bridging", "linking") %>%
    lapply(compare) %>%
    bind_rows()
  
  if(!is.null(result)){
    result %>%
      mutate(datetime = increment) %>%
      return()
  }  
  
}

```

```{r, message = FALSE, warning = FALSE}


get_results = function(file){
  
  # Gather the appropriate nodelist
  if(file %>% str_detect(pattern = "japan|chiba")){
    readRDS(paste("nodes/", file, sep = "")) %>%
      as.data.frame() %>%
      select(muni_code) %>%
      filter(!is.na(muni_code)) %>%
      unique() %>%
      left_join(by = "muni_code",
                y = read_csv("indices_wide.csv") %>% 
                  select(muni_code, social_capital = social_capital_2017,
                         bonding = bonding_2017, 
                         bridging = bridging_2017, linking = linking_2017)) %>%
      left_join(by = "muni_code", 
                y = read_csv("japan_timeseries_data.csv") %>%
                  filter(year == 2017) %>%
                  select(muni_code, pop, income = income_per_capita)) %>%
      rename(code = muni_code) %>%
      write_csv("nodes/temp.csv")
  }
  
  if(file %>% str_detect(pattern = "jackson|nashville|florida")){
    readRDS(paste("nodes/", file, sep = "")) %>%
      as.data.frame() %>%
      select(code) %>%
      filter(!is.na(code)) %>%
      unique() %>%
      left_join(y = read_csv("index/index_county_sub_mi.csv"),
                by = c("code" = "geoid")) %>%
      left_join(y = read_csv("index/demographics_csub.csv") %>% rename(income = medincome),
                by = c("code" = "GEOID")) %>%
      write_csv("nodes/temp.csv")
  }
      
  
  if(file %>% str_detect(pattern = "cave_fire|hillside|getty|shutoff")){
    readRDS("nodes/california.csv") %>%
      as.data.frame() %>%
      select(code) %>%
      filter(!is.na(code)) %>%
      unique() %>%
      left_join(y = read_csv("index/index_county_sub_mi.csv"),
                by = c("code" = "geoid")) %>%
      left_join(y = read_csv("index/demographics_csub.csv") %>% rename(income = medincome),
                by = c("code" = "GEOID")) %>%
      write_csv("nodes/temp.csv")
  }
  
  # Get the date-and-time range for your edgelist 
  time <- read_csv(paste("aggregate/", file, sep = "")) %>%
    mutate(evacuation = if_else(n_crisis - n_baseline > 0, n_crisis - n_baseline, 0)) %>%
    filter(evacuation > 0) %>%
    select(datetime) %>%
    unique()
  
  
  # Save the edgelist as a specific dataset with the name temp.csv
  read_csv(paste("aggregate/", file, sep = "")) %>%
    write_csv("aggregate/temp.csv")
  
  # Run the analysis on each temporal subset of the data
  time$datetime %>%
    lapply(get_subset) %>%
    bind_rows() %>%
    mutate(event = file) %>%
    return()
  
}

```


```{r, message = FALSE, warning = FALSE}
aggregated_files <- dir("aggregate") %>% 
  as.data.frame() %>% 
  filter(. != "temp.csv") %>%
  unlist() %>% as.character() 

c(
  
  "flood_chiba.csv",
 "japan_hagibis.csv",
 "flood_jackson.csv", "tornado_nashville.csv",
  "southern_california_shutoff.csv", # Looks like this one is better
  "getty_fire.csv",
  "hillside_fire_san_bernardino.csv",
  "cave_fire_santa_barbara.csv"
) %>%
  lapply(get_results) %>%
  bind_rows() %>%
  bind_rows(
    "hurricane_dorian_florida.csv" %>%
      get_results()
  ) %>%
  write_csv("homophily_stats_quintiles.csv")

remove(get_network, get_results, get_stats, get_subset)
```


```{r}

read_csv("homophily_stats_quintiles.csv") %>%
  left_join(by = c("event", "datetime"),
            
            # Merge in time
            y = read_csv("homophily_stats_quintiles.csv") %>%
              select(event, datetime) %>%
              distinct() %>%
              group_by(event) %>%
              arrange(datetime) %>%
              mutate(time = 1:n())) %>%
  arrange(event, time) %>%
  write_csv("homophily_stats_quintiles.csv")
```

## 6.2 Visualize

```{r}
read_csv("homophily_stats_quintiles.csv") %>%
  rename(measure = type) %>%
  mutate(name = event %>% recode_factor(
    "southern_california_shutoff.csv" = "Southern California Shutoff",
    "cave_fire_santa_barbara.csv" = "Cave Fire in Santa Barbara",
    "getty_fire.csv" = "Getty Fire",
    "hillside_fire_san_bernardino.csv" = "Hillside Fire in San Bernardino",
    "earthquake_pr.csv" = "Earthquake in Southern Puerto Rice",
    "hurricane_dorian_florida.csv" = "Hurricane Dorian in Florida",
    "japan_hagibis.csv" = "Typhoon Hagibis in Japan",
    "flood_chiba.csv" = "Flood in Chiba, Japan",
    "flood_jackson.csv" = "Flood in Jackson, Mississippi",
    "tornado_nashville.csv" = "Tornado in Nashville")) %>%
  mutate(type = event %>% recode_factor(
    "hurricane_dorian_florida.csv" = "Storm",
    "japan_hagibis.csv" = "Storm",
    "tornado_nashville.csv" = "Tornado",
    "flood_chiba.csv" = "Flood",
    "flood_jackson.csv" = "Flood",
    "getty_fire.csv" = "Fire",
    "hillside_fire_san_bernardino.csv" = "Fire",
    "cave_fire_santa_barbara.csv" = "Fire",
    "earthquake_pr.csv" = "Earthquake",
    "southern_california_shutoff.csv" = "Power Outage")) %>%
  mutate(area = event %>%  recode_factor(
    "hurricane_dorian_florida.csv" = "South",
    "flood_jackson.csv" = "South",
    "tornado_nashville.csv" = "South",
    "getty_fire.csv" = "California",
    "hillside_fire_san_bernardino.csv" = "California",
    "cave_fire_santa_barbara.csv" = "California",
    "southern_california_shutoff.csv" = "California",
    "earthquake_pr.csv" = "Puerto Rico",
    "japan_hagibis.csv" = "Japan",
    "flood_chiba.csv" = "Japan")) %>%
  mutate(area2 = area %>%  recode_factor(
    "South" = "US",
    "California" = "US",
    "Puerto Rico" = "US",
    "Japan" = "Japan")) %>%
  mutate(measure = measure %>% recode_factor(
    "sc" = "Social Capital",
    "bonding" = "Bonding Social Capital",
    "bridging" = "Bridging Social Capital",
    "linking" = "Linking Social Capital",
    "pop" = "Population",
    "income" = "Income per capita")) %>%
  filter(!is.na(from) & !is.na(to)) %>%
  mutate(from = from %>% recode_factor(
    "5" = "80-100",
    "4" = "60-80",
    "3" = "40-60",
    "2" = "20-40",
    "1" = "0-20"),
    to = to %>% recode_factor(
      "5" = "80-100",
      "4" = "60-80",
      "3" = "40-60",
      "2" = "20-40",
      "1" = "0-20")) %>%
  write_csv("homophily_stats_quintiles_ready.csv")

```

```{r}
# Let's summarize this into a single data.frame
library(circlize)

# Take first 42 8-hour sets, aka 14 days
dat <- read_csv("homophily_stats_quintiles_ready.csv") %>%
  filter(time %in% 1:42) %>%
  filter(name == "Hurricane Dorian in Florida",
         measure == "Social Capital") 
  #group_by(measure, name, from, to) %>%
  #summarize(weight = sum(weight, na.rm = TRUE)) 

dat
```





